1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16#include <linux/kernel.h>
17#include <linux/export.h>
18#include <linux/spinlock.h>
19#include <linux/slab.h>
20#include <linux/sched.h>
21#include <linux/fs.h>
22#include <linux/mm.h>
23#include <linux/pagemap.h>
24#include <linux/kthread.h>
25#include <linux/writeback.h>
26#include <linux/blkdev.h>
27#include <linux/backing-dev.h>
28#include <linux/tracepoint.h>
29#include <linux/device.h>
30#include <linux/memcontrol.h>
31#include "internal.h"
32
33
34
35
36#define MIN_WRITEBACK_PAGES (4096UL >> (PAGE_SHIFT - 10))
37
38struct wb_completion {
39 atomic_t cnt;
40};
41
42
43
44
45struct wb_writeback_work {
46 long nr_pages;
47 struct super_block *sb;
48 unsigned long *older_than_this;
49 enum writeback_sync_modes sync_mode;
50 unsigned int tagged_writepages:1;
51 unsigned int for_kupdate:1;
52 unsigned int range_cyclic:1;
53 unsigned int for_background:1;
54 unsigned int for_sync:1;
55 unsigned int auto_free:1;
56 enum wb_reason reason;
57
58 struct list_head list;
59 struct wb_completion *done;
60};
61
62
63
64
65
66
67
68
69#define DEFINE_WB_COMPLETION_ONSTACK(cmpl) \
70 struct wb_completion cmpl = { \
71 .cnt = ATOMIC_INIT(1), \
72 }
73
74
75
76
77
78
79
80
81
82
83
84
85unsigned int dirtytime_expire_interval = 12 * 60 * 60;
86
87static inline struct inode *wb_inode(struct list_head *head)
88{
89 return list_entry(head, struct inode, i_io_list);
90}
91
92
93
94
95
96
97#define CREATE_TRACE_POINTS
98#include <trace/events/writeback.h>
99
100EXPORT_TRACEPOINT_SYMBOL_GPL(wbc_writepage);
101
102static bool wb_io_lists_populated(struct bdi_writeback *wb)
103{
104 if (wb_has_dirty_io(wb)) {
105 return false;
106 } else {
107 set_bit(WB_has_dirty_io, &wb->state);
108 WARN_ON_ONCE(!wb->avg_write_bandwidth);
109 atomic_long_add(wb->avg_write_bandwidth,
110 &wb->bdi->tot_write_bandwidth);
111 return true;
112 }
113}
114
115static void wb_io_lists_depopulated(struct bdi_writeback *wb)
116{
117 if (wb_has_dirty_io(wb) && list_empty(&wb->b_dirty) &&
118 list_empty(&wb->b_io) && list_empty(&wb->b_more_io)) {
119 clear_bit(WB_has_dirty_io, &wb->state);
120 WARN_ON_ONCE(atomic_long_sub_return(wb->avg_write_bandwidth,
121 &wb->bdi->tot_write_bandwidth) < 0);
122 }
123}
124
125
126
127
128
129
130
131
132
133
134
135static bool inode_io_list_move_locked(struct inode *inode,
136 struct bdi_writeback *wb,
137 struct list_head *head)
138{
139 assert_spin_locked(&wb->list_lock);
140
141 list_move(&inode->i_io_list, head);
142
143
144 if (head != &wb->b_dirty_time)
145 return wb_io_lists_populated(wb);
146
147 wb_io_lists_depopulated(wb);
148 return false;
149}
150
151
152
153
154
155
156
157
158
159static void inode_io_list_del_locked(struct inode *inode,
160 struct bdi_writeback *wb)
161{
162 assert_spin_locked(&wb->list_lock);
163
164 list_del_init(&inode->i_io_list);
165 wb_io_lists_depopulated(wb);
166}
167
168static void wb_wakeup(struct bdi_writeback *wb)
169{
170 spin_lock_bh(&wb->work_lock);
171 if (test_bit(WB_registered, &wb->state))
172 mod_delayed_work(bdi_wq, &wb->dwork, 0);
173 spin_unlock_bh(&wb->work_lock);
174}
175
176static void finish_writeback_work(struct bdi_writeback *wb,
177 struct wb_writeback_work *work)
178{
179 struct wb_completion *done = work->done;
180
181 if (work->auto_free)
182 kfree(work);
183 if (done && atomic_dec_and_test(&done->cnt))
184 wake_up_all(&wb->bdi->wb_waitq);
185}
186
187static void wb_queue_work(struct bdi_writeback *wb,
188 struct wb_writeback_work *work)
189{
190 trace_writeback_queue(wb, work);
191
192 if (work->done)
193 atomic_inc(&work->done->cnt);
194
195 spin_lock_bh(&wb->work_lock);
196
197 if (test_bit(WB_registered, &wb->state)) {
198 list_add_tail(&work->list, &wb->work_list);
199 mod_delayed_work(bdi_wq, &wb->dwork, 0);
200 } else
201 finish_writeback_work(wb, work);
202
203 spin_unlock_bh(&wb->work_lock);
204}
205
206
207
208
209
210
211
212
213
214
215
216
217static void wb_wait_for_completion(struct backing_dev_info *bdi,
218 struct wb_completion *done)
219{
220 atomic_dec(&done->cnt);
221 wait_event(bdi->wb_waitq, !atomic_read(&done->cnt));
222}
223
224#ifdef CONFIG_CGROUP_WRITEBACK
225
226
227#define WB_FRN_TIME_SHIFT 13
228#define WB_FRN_TIME_AVG_SHIFT 3
229#define WB_FRN_TIME_CUT_DIV 2
230#define WB_FRN_TIME_PERIOD (2 * (1 << WB_FRN_TIME_SHIFT))
231
232#define WB_FRN_HIST_SLOTS 16
233#define WB_FRN_HIST_UNIT (WB_FRN_TIME_PERIOD / WB_FRN_HIST_SLOTS)
234
235#define WB_FRN_HIST_THR_SLOTS (WB_FRN_HIST_SLOTS / 2)
236
237#define WB_FRN_HIST_MAX_SLOTS (WB_FRN_HIST_THR_SLOTS / 2 + 1)
238
239
240static atomic_t isw_nr_in_flight = ATOMIC_INIT(0);
241static struct workqueue_struct *isw_wq;
242
243void __inode_attach_wb(struct inode *inode, struct page *page)
244{
245 struct backing_dev_info *bdi = inode_to_bdi(inode);
246 struct bdi_writeback *wb = NULL;
247
248 if (inode_cgwb_enabled(inode)) {
249 struct cgroup_subsys_state *memcg_css;
250
251 if (page) {
252 memcg_css = mem_cgroup_css_from_page(page);
253 wb = wb_get_create(bdi, memcg_css, GFP_ATOMIC);
254 } else {
255
256 memcg_css = task_get_css(current, memory_cgrp_id);
257 wb = wb_get_create(bdi, memcg_css, GFP_ATOMIC);
258 css_put(memcg_css);
259 }
260 }
261
262 if (!wb)
263 wb = &bdi->wb;
264
265
266
267
268
269 if (unlikely(cmpxchg(&inode->i_wb, NULL, wb)))
270 wb_put(wb);
271}
272
273
274
275
276
277
278
279
280
281static struct bdi_writeback *
282locked_inode_to_wb_and_lock_list(struct inode *inode)
283 __releases(&inode->i_lock)
284 __acquires(&wb->list_lock)
285{
286 while (true) {
287 struct bdi_writeback *wb = inode_to_wb(inode);
288
289
290
291
292
293
294
295 wb_get(wb);
296 spin_unlock(&inode->i_lock);
297 spin_lock(&wb->list_lock);
298
299
300 if (likely(wb == inode->i_wb)) {
301 wb_put(wb);
302 return wb;
303 }
304
305 spin_unlock(&wb->list_lock);
306 wb_put(wb);
307 cpu_relax();
308 spin_lock(&inode->i_lock);
309 }
310}
311
312
313
314
315
316
317
318
319static struct bdi_writeback *inode_to_wb_and_lock_list(struct inode *inode)
320 __acquires(&wb->list_lock)
321{
322 spin_lock(&inode->i_lock);
323 return locked_inode_to_wb_and_lock_list(inode);
324}
325
326struct inode_switch_wbs_context {
327 struct inode *inode;
328 struct bdi_writeback *new_wb;
329
330 struct rcu_head rcu_head;
331 struct work_struct work;
332};
333
334static void inode_switch_wbs_work_fn(struct work_struct *work)
335{
336 struct inode_switch_wbs_context *isw =
337 container_of(work, struct inode_switch_wbs_context, work);
338 struct inode *inode = isw->inode;
339 struct address_space *mapping = inode->i_mapping;
340 struct bdi_writeback *old_wb = inode->i_wb;
341 struct bdi_writeback *new_wb = isw->new_wb;
342 struct radix_tree_iter iter;
343 bool switched = false;
344 void **slot;
345
346
347
348
349
350
351
352
353
354
355
356 if (old_wb < new_wb) {
357 spin_lock(&old_wb->list_lock);
358 spin_lock_nested(&new_wb->list_lock, SINGLE_DEPTH_NESTING);
359 } else {
360 spin_lock(&new_wb->list_lock);
361 spin_lock_nested(&old_wb->list_lock, SINGLE_DEPTH_NESTING);
362 }
363 spin_lock(&inode->i_lock);
364 xa_lock_irq(&mapping->i_pages);
365
366
367
368
369
370 if (unlikely(inode->i_state & I_FREEING))
371 goto skip_switch;
372
373
374
375
376
377
378 radix_tree_for_each_tagged(slot, &mapping->i_pages, &iter, 0,
379 PAGECACHE_TAG_DIRTY) {
380 struct page *page = radix_tree_deref_slot_protected(slot,
381 &mapping->i_pages.xa_lock);
382 if (likely(page) && PageDirty(page)) {
383 dec_wb_stat(old_wb, WB_RECLAIMABLE);
384 inc_wb_stat(new_wb, WB_RECLAIMABLE);
385 }
386 }
387
388 radix_tree_for_each_tagged(slot, &mapping->i_pages, &iter, 0,
389 PAGECACHE_TAG_WRITEBACK) {
390 struct page *page = radix_tree_deref_slot_protected(slot,
391 &mapping->i_pages.xa_lock);
392 if (likely(page)) {
393 WARN_ON_ONCE(!PageWriteback(page));
394 dec_wb_stat(old_wb, WB_WRITEBACK);
395 inc_wb_stat(new_wb, WB_WRITEBACK);
396 }
397 }
398
399 wb_get(new_wb);
400
401
402
403
404
405
406
407 if (!list_empty(&inode->i_io_list)) {
408 struct inode *pos;
409
410 inode_io_list_del_locked(inode, old_wb);
411 inode->i_wb = new_wb;
412 list_for_each_entry(pos, &new_wb->b_dirty, i_io_list)
413 if (time_after_eq(inode->dirtied_when,
414 pos->dirtied_when))
415 break;
416 inode_io_list_move_locked(inode, new_wb, pos->i_io_list.prev);
417 } else {
418 inode->i_wb = new_wb;
419 }
420
421
422 inode->i_wb_frn_winner = 0;
423 inode->i_wb_frn_avg_time = 0;
424 inode->i_wb_frn_history = 0;
425 switched = true;
426skip_switch:
427
428
429
430
431 smp_store_release(&inode->i_state, inode->i_state & ~I_WB_SWITCH);
432
433 xa_unlock_irq(&mapping->i_pages);
434 spin_unlock(&inode->i_lock);
435 spin_unlock(&new_wb->list_lock);
436 spin_unlock(&old_wb->list_lock);
437
438 if (switched) {
439 wb_wakeup(new_wb);
440 wb_put(old_wb);
441 }
442 wb_put(new_wb);
443
444 iput(inode);
445 kfree(isw);
446
447 atomic_dec(&isw_nr_in_flight);
448}
449
450static void inode_switch_wbs_rcu_fn(struct rcu_head *rcu_head)
451{
452 struct inode_switch_wbs_context *isw = container_of(rcu_head,
453 struct inode_switch_wbs_context, rcu_head);
454
455
456 INIT_WORK(&isw->work, inode_switch_wbs_work_fn);
457 queue_work(isw_wq, &isw->work);
458}
459
460
461
462
463
464
465
466
467
468static void inode_switch_wbs(struct inode *inode, int new_wb_id)
469{
470 struct backing_dev_info *bdi = inode_to_bdi(inode);
471 struct cgroup_subsys_state *memcg_css;
472 struct inode_switch_wbs_context *isw;
473
474
475 if (inode->i_state & I_WB_SWITCH)
476 return;
477
478 isw = kzalloc(sizeof(*isw), GFP_ATOMIC);
479 if (!isw)
480 return;
481
482
483 rcu_read_lock();
484 memcg_css = css_from_id(new_wb_id, &memory_cgrp_subsys);
485 if (memcg_css)
486 isw->new_wb = wb_get_create(bdi, memcg_css, GFP_ATOMIC);
487 rcu_read_unlock();
488 if (!isw->new_wb)
489 goto out_free;
490
491
492 spin_lock(&inode->i_lock);
493 if (!(inode->i_sb->s_flags & SB_ACTIVE) ||
494 inode->i_state & (I_WB_SWITCH | I_FREEING) ||
495 inode_to_wb(inode) == isw->new_wb) {
496 spin_unlock(&inode->i_lock);
497 goto out_free;
498 }
499 inode->i_state |= I_WB_SWITCH;
500 __iget(inode);
501 spin_unlock(&inode->i_lock);
502
503 isw->inode = inode;
504
505 atomic_inc(&isw_nr_in_flight);
506
507
508
509
510
511
512
513 call_rcu(&isw->rcu_head, inode_switch_wbs_rcu_fn);
514 return;
515
516out_free:
517 if (isw->new_wb)
518 wb_put(isw->new_wb);
519 kfree(isw);
520}
521
522
523
524
525
526
527
528
529
530
531
532void wbc_attach_and_unlock_inode(struct writeback_control *wbc,
533 struct inode *inode)
534{
535 if (!inode_cgwb_enabled(inode)) {
536 spin_unlock(&inode->i_lock);
537 return;
538 }
539
540 wbc->wb = inode_to_wb(inode);
541 wbc->inode = inode;
542
543 wbc->wb_id = wbc->wb->memcg_css->id;
544 wbc->wb_lcand_id = inode->i_wb_frn_winner;
545 wbc->wb_tcand_id = 0;
546 wbc->wb_bytes = 0;
547 wbc->wb_lcand_bytes = 0;
548 wbc->wb_tcand_bytes = 0;
549
550 wb_get(wbc->wb);
551 spin_unlock(&inode->i_lock);
552
553
554
555
556
557 if (unlikely(wb_dying(wbc->wb)))
558 inode_switch_wbs(inode, wbc->wb_id);
559}
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598void wbc_detach_inode(struct writeback_control *wbc)
599{
600 struct bdi_writeback *wb = wbc->wb;
601 struct inode *inode = wbc->inode;
602 unsigned long avg_time, max_bytes, max_time;
603 u16 history;
604 int max_id;
605
606 if (!wb)
607 return;
608
609 history = inode->i_wb_frn_history;
610 avg_time = inode->i_wb_frn_avg_time;
611
612
613 if (wbc->wb_bytes >= wbc->wb_lcand_bytes &&
614 wbc->wb_bytes >= wbc->wb_tcand_bytes) {
615 max_id = wbc->wb_id;
616 max_bytes = wbc->wb_bytes;
617 } else if (wbc->wb_lcand_bytes >= wbc->wb_tcand_bytes) {
618 max_id = wbc->wb_lcand_id;
619 max_bytes = wbc->wb_lcand_bytes;
620 } else {
621 max_id = wbc->wb_tcand_id;
622 max_bytes = wbc->wb_tcand_bytes;
623 }
624
625
626
627
628
629
630
631
632 max_time = DIV_ROUND_UP((max_bytes >> PAGE_SHIFT) << WB_FRN_TIME_SHIFT,
633 wb->avg_write_bandwidth);
634 if (avg_time)
635 avg_time += (max_time >> WB_FRN_TIME_AVG_SHIFT) -
636 (avg_time >> WB_FRN_TIME_AVG_SHIFT);
637 else
638 avg_time = max_time;
639
640 if (max_time >= avg_time / WB_FRN_TIME_CUT_DIV) {
641 int slots;
642
643
644
645
646
647
648
649
650
651 slots = min(DIV_ROUND_UP(max_time, WB_FRN_HIST_UNIT),
652 (unsigned long)WB_FRN_HIST_MAX_SLOTS);
653 history <<= slots;
654 if (wbc->wb_id != max_id)
655 history |= (1U << slots) - 1;
656
657
658
659
660
661
662
663
664 if (hweight32(history) > WB_FRN_HIST_THR_SLOTS)
665 inode_switch_wbs(inode, max_id);
666 }
667
668
669
670
671
672 inode->i_wb_frn_winner = max_id;
673 inode->i_wb_frn_avg_time = min(avg_time, (unsigned long)U16_MAX);
674 inode->i_wb_frn_history = history;
675
676 wb_put(wbc->wb);
677 wbc->wb = NULL;
678}
679
680
681
682
683
684
685
686
687
688
689
690void wbc_account_io(struct writeback_control *wbc, struct page *page,
691 size_t bytes)
692{
693 int id;
694
695
696
697
698
699
700
701 if (!wbc->wb)
702 return;
703
704 id = mem_cgroup_css_from_page(page)->id;
705
706 if (id == wbc->wb_id) {
707 wbc->wb_bytes += bytes;
708 return;
709 }
710
711 if (id == wbc->wb_lcand_id)
712 wbc->wb_lcand_bytes += bytes;
713
714
715 if (!wbc->wb_tcand_bytes)
716 wbc->wb_tcand_id = id;
717 if (id == wbc->wb_tcand_id)
718 wbc->wb_tcand_bytes += bytes;
719 else
720 wbc->wb_tcand_bytes -= min(bytes, wbc->wb_tcand_bytes);
721}
722EXPORT_SYMBOL_GPL(wbc_account_io);
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740int inode_congested(struct inode *inode, int cong_bits)
741{
742
743
744
745
746 if (inode && inode_to_wb_is_valid(inode)) {
747 struct bdi_writeback *wb;
748 struct wb_lock_cookie lock_cookie = {};
749 bool congested;
750
751 wb = unlocked_inode_to_wb_begin(inode, &lock_cookie);
752 congested = wb_congested(wb, cong_bits);
753 unlocked_inode_to_wb_end(inode, &lock_cookie);
754 return congested;
755 }
756
757 return wb_congested(&inode_to_bdi(inode)->wb, cong_bits);
758}
759EXPORT_SYMBOL_GPL(inode_congested);
760
761
762
763
764
765
766
767
768
769
770static long wb_split_bdi_pages(struct bdi_writeback *wb, long nr_pages)
771{
772 unsigned long this_bw = wb->avg_write_bandwidth;
773 unsigned long tot_bw = atomic_long_read(&wb->bdi->tot_write_bandwidth);
774
775 if (nr_pages == LONG_MAX)
776 return LONG_MAX;
777
778
779
780
781
782
783 if (!tot_bw || this_bw >= tot_bw)
784 return nr_pages;
785 else
786 return DIV_ROUND_UP_ULL((u64)nr_pages * this_bw, tot_bw);
787}
788
789
790
791
792
793
794
795
796
797
798
799
800static void bdi_split_work_to_wbs(struct backing_dev_info *bdi,
801 struct wb_writeback_work *base_work,
802 bool skip_if_busy)
803{
804 struct bdi_writeback *last_wb = NULL;
805 struct bdi_writeback *wb = list_entry(&bdi->wb_list,
806 struct bdi_writeback, bdi_node);
807
808 might_sleep();
809restart:
810 rcu_read_lock();
811 list_for_each_entry_continue_rcu(wb, &bdi->wb_list, bdi_node) {
812 DEFINE_WB_COMPLETION_ONSTACK(fallback_work_done);
813 struct wb_writeback_work fallback_work;
814 struct wb_writeback_work *work;
815 long nr_pages;
816
817 if (last_wb) {
818 wb_put(last_wb);
819 last_wb = NULL;
820 }
821
822
823 if (!wb_has_dirty_io(wb) &&
824 (base_work->sync_mode == WB_SYNC_NONE ||
825 list_empty(&wb->b_dirty_time)))
826 continue;
827 if (skip_if_busy && writeback_in_progress(wb))
828 continue;
829
830 nr_pages = wb_split_bdi_pages(wb, base_work->nr_pages);
831
832 work = kmalloc(sizeof(*work), GFP_ATOMIC);
833 if (work) {
834 *work = *base_work;
835 work->nr_pages = nr_pages;
836 work->auto_free = 1;
837 wb_queue_work(wb, work);
838 continue;
839 }
840
841
842 work = &fallback_work;
843 *work = *base_work;
844 work->nr_pages = nr_pages;
845 work->auto_free = 0;
846 work->done = &fallback_work_done;
847
848 wb_queue_work(wb, work);
849
850
851
852
853
854
855 wb_get(wb);
856 last_wb = wb;
857
858 rcu_read_unlock();
859 wb_wait_for_completion(bdi, &fallback_work_done);
860 goto restart;
861 }
862 rcu_read_unlock();
863
864 if (last_wb)
865 wb_put(last_wb);
866}
867
868
869
870
871
872
873
874
875
876
877
878void cgroup_writeback_umount(void)
879{
880 if (atomic_read(&isw_nr_in_flight)) {
881 synchronize_rcu();
882 flush_workqueue(isw_wq);
883 }
884}
885
886static int __init cgroup_writeback_init(void)
887{
888 isw_wq = alloc_workqueue("inode_switch_wbs", 0, 0);
889 if (!isw_wq)
890 return -ENOMEM;
891 return 0;
892}
893fs_initcall(cgroup_writeback_init);
894
895#else
896
897static struct bdi_writeback *
898locked_inode_to_wb_and_lock_list(struct inode *inode)
899 __releases(&inode->i_lock)
900 __acquires(&wb->list_lock)
901{
902 struct bdi_writeback *wb = inode_to_wb(inode);
903
904 spin_unlock(&inode->i_lock);
905 spin_lock(&wb->list_lock);
906 return wb;
907}
908
909static struct bdi_writeback *inode_to_wb_and_lock_list(struct inode *inode)
910 __acquires(&wb->list_lock)
911{
912 struct bdi_writeback *wb = inode_to_wb(inode);
913
914 spin_lock(&wb->list_lock);
915 return wb;
916}
917
918static long wb_split_bdi_pages(struct bdi_writeback *wb, long nr_pages)
919{
920 return nr_pages;
921}
922
923static void bdi_split_work_to_wbs(struct backing_dev_info *bdi,
924 struct wb_writeback_work *base_work,
925 bool skip_if_busy)
926{
927 might_sleep();
928
929 if (!skip_if_busy || !writeback_in_progress(&bdi->wb)) {
930 base_work->auto_free = 0;
931 wb_queue_work(&bdi->wb, base_work);
932 }
933}
934
935#endif
936
937
938
939
940
941static unsigned long get_nr_dirty_pages(void)
942{
943 return global_node_page_state(NR_FILE_DIRTY) +
944 global_node_page_state(NR_UNSTABLE_NFS) +
945 get_nr_dirty_inodes();
946}
947
948static void wb_start_writeback(struct bdi_writeback *wb, enum wb_reason reason)
949{
950 if (!wb_has_dirty_io(wb))
951 return;
952
953
954
955
956
957
958
959
960
961 if (test_bit(WB_start_all, &wb->state) ||
962 test_and_set_bit(WB_start_all, &wb->state))
963 return;
964
965 wb->start_all_reason = reason;
966 wb_wakeup(wb);
967}
968
969
970
971
972
973
974
975
976
977
978
979void wb_start_background_writeback(struct bdi_writeback *wb)
980{
981
982
983
984
985 trace_writeback_wake_background(wb);
986 wb_wakeup(wb);
987}
988
989
990
991
992void inode_io_list_del(struct inode *inode)
993{
994 struct bdi_writeback *wb;
995
996 wb = inode_to_wb_and_lock_list(inode);
997 inode_io_list_del_locked(inode, wb);
998 spin_unlock(&wb->list_lock);
999}
1000
1001
1002
1003
1004void sb_mark_inode_writeback(struct inode *inode)
1005{
1006 struct super_block *sb = inode->i_sb;
1007 unsigned long flags;
1008
1009 if (list_empty(&inode->i_wb_list)) {
1010 spin_lock_irqsave(&sb->s_inode_wblist_lock, flags);
1011 if (list_empty(&inode->i_wb_list)) {
1012 list_add_tail(&inode->i_wb_list, &sb->s_inodes_wb);
1013 trace_sb_mark_inode_writeback(inode);
1014 }
1015 spin_unlock_irqrestore(&sb->s_inode_wblist_lock, flags);
1016 }
1017}
1018
1019
1020
1021
1022void sb_clear_inode_writeback(struct inode *inode)
1023{
1024 struct super_block *sb = inode->i_sb;
1025 unsigned long flags;
1026
1027 if (!list_empty(&inode->i_wb_list)) {
1028 spin_lock_irqsave(&sb->s_inode_wblist_lock, flags);
1029 if (!list_empty(&inode->i_wb_list)) {
1030 list_del_init(&inode->i_wb_list);
1031 trace_sb_clear_inode_writeback(inode);
1032 }
1033 spin_unlock_irqrestore(&sb->s_inode_wblist_lock, flags);
1034 }
1035}
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046static void redirty_tail(struct inode *inode, struct bdi_writeback *wb)
1047{
1048 if (!list_empty(&wb->b_dirty)) {
1049 struct inode *tail;
1050
1051 tail = wb_inode(wb->b_dirty.next);
1052 if (time_before(inode->dirtied_when, tail->dirtied_when))
1053 inode->dirtied_when = jiffies;
1054 }
1055 inode_io_list_move_locked(inode, wb, &wb->b_dirty);
1056}
1057
1058
1059
1060
1061static void requeue_io(struct inode *inode, struct bdi_writeback *wb)
1062{
1063 inode_io_list_move_locked(inode, wb, &wb->b_more_io);
1064}
1065
1066static void inode_sync_complete(struct inode *inode)
1067{
1068 inode->i_state &= ~I_SYNC;
1069
1070 inode_add_lru(inode);
1071
1072 smp_mb();
1073 wake_up_bit(&inode->i_state, __I_SYNC);
1074}
1075
1076static bool inode_dirtied_after(struct inode *inode, unsigned long t)
1077{
1078 bool ret = time_after(inode->dirtied_when, t);
1079#ifndef CONFIG_64BIT
1080
1081
1082
1083
1084
1085
1086 ret = ret && time_before_eq(inode->dirtied_when, jiffies);
1087#endif
1088 return ret;
1089}
1090
1091#define EXPIRE_DIRTY_ATIME 0x0001
1092
1093
1094
1095
1096
1097static int move_expired_inodes(struct list_head *delaying_queue,
1098 struct list_head *dispatch_queue,
1099 int flags,
1100 struct wb_writeback_work *work)
1101{
1102 unsigned long *older_than_this = NULL;
1103 unsigned long expire_time;
1104 LIST_HEAD(tmp);
1105 struct list_head *pos, *node;
1106 struct super_block *sb = NULL;
1107 struct inode *inode;
1108 int do_sb_sort = 0;
1109 int moved = 0;
1110
1111 if ((flags & EXPIRE_DIRTY_ATIME) == 0)
1112 older_than_this = work->older_than_this;
1113 else if (!work->for_sync) {
1114 expire_time = jiffies - (dirtytime_expire_interval * HZ);
1115 older_than_this = &expire_time;
1116 }
1117 while (!list_empty(delaying_queue)) {
1118 inode = wb_inode(delaying_queue->prev);
1119 if (older_than_this &&
1120 inode_dirtied_after(inode, *older_than_this))
1121 break;
1122 list_move(&inode->i_io_list, &tmp);
1123 moved++;
1124 if (flags & EXPIRE_DIRTY_ATIME)
1125 set_bit(__I_DIRTY_TIME_EXPIRED, &inode->i_state);
1126 if (sb_is_blkdev_sb(inode->i_sb))
1127 continue;
1128 if (sb && sb != inode->i_sb)
1129 do_sb_sort = 1;
1130 sb = inode->i_sb;
1131 }
1132
1133
1134 if (!do_sb_sort) {
1135 list_splice(&tmp, dispatch_queue);
1136 goto out;
1137 }
1138
1139
1140 while (!list_empty(&tmp)) {
1141 sb = wb_inode(tmp.prev)->i_sb;
1142 list_for_each_prev_safe(pos, node, &tmp) {
1143 inode = wb_inode(pos);
1144 if (inode->i_sb == sb)
1145 list_move(&inode->i_io_list, dispatch_queue);
1146 }
1147 }
1148out:
1149 return moved;
1150}
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163static void queue_io(struct bdi_writeback *wb, struct wb_writeback_work *work)
1164{
1165 int moved;
1166
1167 assert_spin_locked(&wb->list_lock);
1168 list_splice_init(&wb->b_more_io, &wb->b_io);
1169 moved = move_expired_inodes(&wb->b_dirty, &wb->b_io, 0, work);
1170 moved += move_expired_inodes(&wb->b_dirty_time, &wb->b_io,
1171 EXPIRE_DIRTY_ATIME, work);
1172 if (moved)
1173 wb_io_lists_populated(wb);
1174 trace_writeback_queue_io(wb, work, moved);
1175}
1176
1177static int write_inode(struct inode *inode, struct writeback_control *wbc)
1178{
1179 int ret;
1180
1181 if (inode->i_sb->s_op->write_inode && !is_bad_inode(inode)) {
1182 trace_writeback_write_inode_start(inode, wbc);
1183 ret = inode->i_sb->s_op->write_inode(inode, wbc);
1184 trace_writeback_write_inode(inode, wbc);
1185 return ret;
1186 }
1187 return 0;
1188}
1189
1190
1191
1192
1193
1194static void __inode_wait_for_writeback(struct inode *inode)
1195 __releases(inode->i_lock)
1196 __acquires(inode->i_lock)
1197{
1198 DEFINE_WAIT_BIT(wq, &inode->i_state, __I_SYNC);
1199 wait_queue_head_t *wqh;
1200
1201 wqh = bit_waitqueue(&inode->i_state, __I_SYNC);
1202 while (inode->i_state & I_SYNC) {
1203 spin_unlock(&inode->i_lock);
1204 __wait_on_bit(wqh, &wq, bit_wait,
1205 TASK_UNINTERRUPTIBLE);
1206 spin_lock(&inode->i_lock);
1207 }
1208}
1209
1210
1211
1212
1213void inode_wait_for_writeback(struct inode *inode)
1214{
1215 spin_lock(&inode->i_lock);
1216 __inode_wait_for_writeback(inode);
1217 spin_unlock(&inode->i_lock);
1218}
1219
1220
1221
1222
1223
1224
1225static void inode_sleep_on_writeback(struct inode *inode)
1226 __releases(inode->i_lock)
1227{
1228 DEFINE_WAIT(wait);
1229 wait_queue_head_t *wqh = bit_waitqueue(&inode->i_state, __I_SYNC);
1230 int sleep;
1231
1232 prepare_to_wait(wqh, &wait, TASK_UNINTERRUPTIBLE);
1233 sleep = inode->i_state & I_SYNC;
1234 spin_unlock(&inode->i_lock);
1235 if (sleep)
1236 schedule();
1237 finish_wait(wqh, &wait);
1238}
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248static void requeue_inode(struct inode *inode, struct bdi_writeback *wb,
1249 struct writeback_control *wbc)
1250{
1251 if (inode->i_state & I_FREEING)
1252 return;
1253
1254
1255
1256
1257
1258
1259 if ((inode->i_state & I_DIRTY) &&
1260 (wbc->sync_mode == WB_SYNC_ALL || wbc->tagged_writepages))
1261 inode->dirtied_when = jiffies;
1262
1263 if (wbc->pages_skipped) {
1264
1265
1266
1267
1268 redirty_tail(inode, wb);
1269 return;
1270 }
1271
1272 if (mapping_tagged(inode->i_mapping, PAGECACHE_TAG_DIRTY)) {
1273
1274
1275
1276
1277 if (wbc->nr_to_write <= 0) {
1278
1279 requeue_io(inode, wb);
1280 } else {
1281
1282
1283
1284
1285
1286
1287
1288 redirty_tail(inode, wb);
1289 }
1290 } else if (inode->i_state & I_DIRTY) {
1291
1292
1293
1294
1295
1296 redirty_tail(inode, wb);
1297 } else if (inode->i_state & I_DIRTY_TIME) {
1298 inode->dirtied_when = jiffies;
1299 inode_io_list_move_locked(inode, wb, &wb->b_dirty_time);
1300 } else {
1301
1302 inode_io_list_del_locked(inode, wb);
1303 }
1304}
1305
1306
1307
1308
1309
1310
1311static int
1312__writeback_single_inode(struct inode *inode, struct writeback_control *wbc)
1313{
1314 struct address_space *mapping = inode->i_mapping;
1315 long nr_to_write = wbc->nr_to_write;
1316 unsigned dirty;
1317 int ret;
1318
1319 WARN_ON(!(inode->i_state & I_SYNC));
1320
1321 trace_writeback_single_inode_start(inode, wbc, nr_to_write);
1322
1323 ret = do_writepages(mapping, wbc);
1324
1325
1326
1327
1328
1329
1330
1331
1332 if (wbc->sync_mode == WB_SYNC_ALL && !wbc->for_sync) {
1333 int err = filemap_fdatawait(mapping);
1334 if (ret == 0)
1335 ret = err;
1336 }
1337
1338
1339
1340
1341
1342
1343 spin_lock(&inode->i_lock);
1344
1345 dirty = inode->i_state & I_DIRTY;
1346 if (inode->i_state & I_DIRTY_TIME) {
1347 if ((dirty & I_DIRTY_INODE) ||
1348 wbc->sync_mode == WB_SYNC_ALL ||
1349 unlikely(inode->i_state & I_DIRTY_TIME_EXPIRED) ||
1350 unlikely(time_after(jiffies,
1351 (inode->dirtied_time_when +
1352 dirtytime_expire_interval * HZ)))) {
1353 dirty |= I_DIRTY_TIME | I_DIRTY_TIME_EXPIRED;
1354 trace_writeback_lazytime(inode);
1355 }
1356 } else
1357 inode->i_state &= ~I_DIRTY_TIME_EXPIRED;
1358 inode->i_state &= ~dirty;
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371 smp_mb();
1372
1373 if (mapping_tagged(mapping, PAGECACHE_TAG_DIRTY))
1374 inode->i_state |= I_DIRTY_PAGES;
1375
1376 spin_unlock(&inode->i_lock);
1377
1378 if (dirty & I_DIRTY_TIME)
1379 mark_inode_dirty_sync(inode);
1380
1381 if (dirty & ~I_DIRTY_PAGES) {
1382 int err = write_inode(inode, wbc);
1383 if (ret == 0)
1384 ret = err;
1385 }
1386 trace_writeback_single_inode(inode, wbc, nr_to_write);
1387 return ret;
1388}
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398static int writeback_single_inode(struct inode *inode,
1399 struct writeback_control *wbc)
1400{
1401 struct bdi_writeback *wb;
1402 int ret = 0;
1403
1404 spin_lock(&inode->i_lock);
1405 if (!atomic_read(&inode->i_count))
1406 WARN_ON(!(inode->i_state & (I_WILL_FREE|I_FREEING)));
1407 else
1408 WARN_ON(inode->i_state & I_WILL_FREE);
1409
1410 if (inode->i_state & I_SYNC) {
1411 if (wbc->sync_mode != WB_SYNC_ALL)
1412 goto out;
1413
1414
1415
1416
1417
1418 __inode_wait_for_writeback(inode);
1419 }
1420 WARN_ON(inode->i_state & I_SYNC);
1421
1422
1423
1424
1425
1426
1427
1428
1429 if (!(inode->i_state & I_DIRTY_ALL) &&
1430 (wbc->sync_mode != WB_SYNC_ALL ||
1431 !mapping_tagged(inode->i_mapping, PAGECACHE_TAG_WRITEBACK)))
1432 goto out;
1433 inode->i_state |= I_SYNC;
1434 wbc_attach_and_unlock_inode(wbc, inode);
1435
1436 ret = __writeback_single_inode(inode, wbc);
1437
1438 wbc_detach_inode(wbc);
1439
1440 wb = inode_to_wb_and_lock_list(inode);
1441 spin_lock(&inode->i_lock);
1442
1443
1444
1445
1446 if (!(inode->i_state & I_DIRTY_ALL))
1447 inode_io_list_del_locked(inode, wb);
1448 spin_unlock(&wb->list_lock);
1449 inode_sync_complete(inode);
1450out:
1451 spin_unlock(&inode->i_lock);
1452 return ret;
1453}
1454
1455static long writeback_chunk_size(struct bdi_writeback *wb,
1456 struct wb_writeback_work *work)
1457{
1458 long pages;
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473 if (work->sync_mode == WB_SYNC_ALL || work->tagged_writepages)
1474 pages = LONG_MAX;
1475 else {
1476 pages = min(wb->avg_write_bandwidth / 2,
1477 global_wb_domain.dirty_limit / DIRTY_SCOPE);
1478 pages = min(pages, work->nr_pages);
1479 pages = round_down(pages + MIN_WRITEBACK_PAGES,
1480 MIN_WRITEBACK_PAGES);
1481 }
1482
1483 return pages;
1484}
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495static long writeback_sb_inodes(struct super_block *sb,
1496 struct bdi_writeback *wb,
1497 struct wb_writeback_work *work)
1498{
1499 struct writeback_control wbc = {
1500 .sync_mode = work->sync_mode,
1501 .tagged_writepages = work->tagged_writepages,
1502 .for_kupdate = work->for_kupdate,
1503 .for_background = work->for_background,
1504 .for_sync = work->for_sync,
1505 .range_cyclic = work->range_cyclic,
1506 .range_start = 0,
1507 .range_end = LLONG_MAX,
1508 };
1509 unsigned long start_time = jiffies;
1510 long write_chunk;
1511 long wrote = 0;
1512
1513 while (!list_empty(&wb->b_io)) {
1514 struct inode *inode = wb_inode(wb->b_io.prev);
1515 struct bdi_writeback *tmp_wb;
1516
1517 if (inode->i_sb != sb) {
1518 if (work->sb) {
1519
1520
1521
1522
1523
1524 redirty_tail(inode, wb);
1525 continue;
1526 }
1527
1528
1529
1530
1531
1532
1533 break;
1534 }
1535
1536
1537
1538
1539
1540
1541 spin_lock(&inode->i_lock);
1542 if (inode->i_state & (I_NEW | I_FREEING | I_WILL_FREE)) {
1543 spin_unlock(&inode->i_lock);
1544 redirty_tail(inode, wb);
1545 continue;
1546 }
1547 if ((inode->i_state & I_SYNC) && wbc.sync_mode != WB_SYNC_ALL) {
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557 spin_unlock(&inode->i_lock);
1558 requeue_io(inode, wb);
1559 trace_writeback_sb_inodes_requeue(inode);
1560 continue;
1561 }
1562 spin_unlock(&wb->list_lock);
1563
1564
1565
1566
1567
1568
1569 if (inode->i_state & I_SYNC) {
1570
1571 inode_sleep_on_writeback(inode);
1572
1573 spin_lock(&wb->list_lock);
1574 continue;
1575 }
1576 inode->i_state |= I_SYNC;
1577 wbc_attach_and_unlock_inode(&wbc, inode);
1578
1579 write_chunk = writeback_chunk_size(wb, work);
1580 wbc.nr_to_write = write_chunk;
1581 wbc.pages_skipped = 0;
1582
1583
1584
1585
1586
1587 __writeback_single_inode(inode, &wbc);
1588
1589 wbc_detach_inode(&wbc);
1590 work->nr_pages -= write_chunk - wbc.nr_to_write;
1591 wrote += write_chunk - wbc.nr_to_write;
1592
1593 if (need_resched()) {
1594
1595
1596
1597
1598
1599
1600
1601
1602 blk_flush_plug(current);
1603 cond_resched();
1604 }
1605
1606
1607
1608
1609
1610 tmp_wb = inode_to_wb_and_lock_list(inode);
1611 spin_lock(&inode->i_lock);
1612 if (!(inode->i_state & I_DIRTY_ALL))
1613 wrote++;
1614 requeue_inode(inode, tmp_wb, &wbc);
1615 inode_sync_complete(inode);
1616 spin_unlock(&inode->i_lock);
1617
1618 if (unlikely(tmp_wb != wb)) {
1619 spin_unlock(&tmp_wb->list_lock);
1620 spin_lock(&wb->list_lock);
1621 }
1622
1623
1624
1625
1626
1627 if (wrote) {
1628 if (time_is_before_jiffies(start_time + HZ / 10UL))
1629 break;
1630 if (work->nr_pages <= 0)
1631 break;
1632 }
1633 }
1634 return wrote;
1635}
1636
1637static long __writeback_inodes_wb(struct bdi_writeback *wb,
1638 struct wb_writeback_work *work)
1639{
1640 unsigned long start_time = jiffies;
1641 long wrote = 0;
1642
1643 while (!list_empty(&wb->b_io)) {
1644 struct inode *inode = wb_inode(wb->b_io.prev);
1645 struct super_block *sb = inode->i_sb;
1646
1647 if (!trylock_super(sb)) {
1648
1649
1650
1651
1652
1653 redirty_tail(inode, wb);
1654 continue;
1655 }
1656 wrote += writeback_sb_inodes(sb, wb, work);
1657 up_read(&sb->s_umount);
1658
1659
1660 if (wrote) {
1661 if (time_is_before_jiffies(start_time + HZ / 10UL))
1662 break;
1663 if (work->nr_pages <= 0)
1664 break;
1665 }
1666 }
1667
1668 return wrote;
1669}
1670
1671static long writeback_inodes_wb(struct bdi_writeback *wb, long nr_pages,
1672 enum wb_reason reason)
1673{
1674 struct wb_writeback_work work = {
1675 .nr_pages = nr_pages,
1676 .sync_mode = WB_SYNC_NONE,
1677 .range_cyclic = 1,
1678 .reason = reason,
1679 };
1680 struct blk_plug plug;
1681
1682 blk_start_plug(&plug);
1683 spin_lock(&wb->list_lock);
1684 if (list_empty(&wb->b_io))
1685 queue_io(wb, &work);
1686 __writeback_inodes_wb(wb, &work);
1687 spin_unlock(&wb->list_lock);
1688 blk_finish_plug(&plug);
1689
1690 return nr_pages - work.nr_pages;
1691}
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708static long wb_writeback(struct bdi_writeback *wb,
1709 struct wb_writeback_work *work)
1710{
1711 unsigned long wb_start = jiffies;
1712 long nr_pages = work->nr_pages;
1713 unsigned long oldest_jif;
1714 struct inode *inode;
1715 long progress;
1716 struct blk_plug plug;
1717
1718 oldest_jif = jiffies;
1719 work->older_than_this = &oldest_jif;
1720
1721 blk_start_plug(&plug);
1722 spin_lock(&wb->list_lock);
1723 for (;;) {
1724
1725
1726
1727 if (work->nr_pages <= 0)
1728 break;
1729
1730
1731
1732
1733
1734
1735
1736 if ((work->for_background || work->for_kupdate) &&
1737 !list_empty(&wb->work_list))
1738 break;
1739
1740
1741
1742
1743
1744 if (work->for_background && !wb_over_bg_thresh(wb))
1745 break;
1746
1747
1748
1749
1750
1751
1752
1753 if (work->for_kupdate) {
1754 oldest_jif = jiffies -
1755 msecs_to_jiffies(dirty_expire_interval * 10);
1756 } else if (work->for_background)
1757 oldest_jif = jiffies;
1758
1759 trace_writeback_start(wb, work);
1760 if (list_empty(&wb->b_io))
1761 queue_io(wb, work);
1762 if (work->sb)
1763 progress = writeback_sb_inodes(work->sb, wb, work);
1764 else
1765 progress = __writeback_inodes_wb(wb, work);
1766 trace_writeback_written(wb, work);
1767
1768 wb_update_bandwidth(wb, wb_start);
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778 if (progress)
1779 continue;
1780
1781
1782
1783 if (list_empty(&wb->b_more_io))
1784 break;
1785
1786
1787
1788
1789
1790 trace_writeback_wait(wb, work);
1791 inode = wb_inode(wb->b_more_io.prev);
1792 spin_lock(&inode->i_lock);
1793 spin_unlock(&wb->list_lock);
1794
1795 inode_sleep_on_writeback(inode);
1796 spin_lock(&wb->list_lock);
1797 }
1798 spin_unlock(&wb->list_lock);
1799 blk_finish_plug(&plug);
1800
1801 return nr_pages - work->nr_pages;
1802}
1803
1804
1805
1806
1807static struct wb_writeback_work *get_next_work_item(struct bdi_writeback *wb)
1808{
1809 struct wb_writeback_work *work = NULL;
1810
1811 spin_lock_bh(&wb->work_lock);
1812 if (!list_empty(&wb->work_list)) {
1813 work = list_entry(wb->work_list.next,
1814 struct wb_writeback_work, list);
1815 list_del_init(&work->list);
1816 }
1817 spin_unlock_bh(&wb->work_lock);
1818 return work;
1819}
1820
1821static long wb_check_background_flush(struct bdi_writeback *wb)
1822{
1823 if (wb_over_bg_thresh(wb)) {
1824
1825 struct wb_writeback_work work = {
1826 .nr_pages = LONG_MAX,
1827 .sync_mode = WB_SYNC_NONE,
1828 .for_background = 1,
1829 .range_cyclic = 1,
1830 .reason = WB_REASON_BACKGROUND,
1831 };
1832
1833 return wb_writeback(wb, &work);
1834 }
1835
1836 return 0;
1837}
1838
1839static long wb_check_old_data_flush(struct bdi_writeback *wb)
1840{
1841 unsigned long expired;
1842 long nr_pages;
1843
1844
1845
1846
1847 if (!dirty_writeback_interval)
1848 return 0;
1849
1850 expired = wb->last_old_flush +
1851 msecs_to_jiffies(dirty_writeback_interval * 10);
1852 if (time_before(jiffies, expired))
1853 return 0;
1854
1855 wb->last_old_flush = jiffies;
1856 nr_pages = get_nr_dirty_pages();
1857
1858 if (nr_pages) {
1859 struct wb_writeback_work work = {
1860 .nr_pages = nr_pages,
1861 .sync_mode = WB_SYNC_NONE,
1862 .for_kupdate = 1,
1863 .range_cyclic = 1,
1864 .reason = WB_REASON_PERIODIC,
1865 };
1866
1867 return wb_writeback(wb, &work);
1868 }
1869
1870 return 0;
1871}
1872
1873static long wb_check_start_all(struct bdi_writeback *wb)
1874{
1875 long nr_pages;
1876
1877 if (!test_bit(WB_start_all, &wb->state))
1878 return 0;
1879
1880 nr_pages = get_nr_dirty_pages();
1881 if (nr_pages) {
1882 struct wb_writeback_work work = {
1883 .nr_pages = wb_split_bdi_pages(wb, nr_pages),
1884 .sync_mode = WB_SYNC_NONE,
1885 .range_cyclic = 1,
1886 .reason = wb->start_all_reason,
1887 };
1888
1889 nr_pages = wb_writeback(wb, &work);
1890 }
1891
1892 clear_bit(WB_start_all, &wb->state);
1893 return nr_pages;
1894}
1895
1896
1897
1898
1899
1900static long wb_do_writeback(struct bdi_writeback *wb)
1901{
1902 struct wb_writeback_work *work;
1903 long wrote = 0;
1904
1905 set_bit(WB_writeback_running, &wb->state);
1906 while ((work = get_next_work_item(wb)) != NULL) {
1907 trace_writeback_exec(wb, work);
1908 wrote += wb_writeback(wb, work);
1909 finish_writeback_work(wb, work);
1910 }
1911
1912
1913
1914
1915 wrote += wb_check_start_all(wb);
1916
1917
1918
1919
1920 wrote += wb_check_old_data_flush(wb);
1921 wrote += wb_check_background_flush(wb);
1922 clear_bit(WB_writeback_running, &wb->state);
1923
1924 return wrote;
1925}
1926
1927
1928
1929
1930
1931void wb_workfn(struct work_struct *work)
1932{
1933 struct bdi_writeback *wb = container_of(to_delayed_work(work),
1934 struct bdi_writeback, dwork);
1935 long pages_written;
1936
1937 set_worker_desc("flush-%s", dev_name(wb->bdi->dev));
1938 current->flags |= PF_SWAPWRITE;
1939
1940 if (likely(!current_is_workqueue_rescuer() ||
1941 !test_bit(WB_registered, &wb->state))) {
1942
1943
1944
1945
1946
1947
1948 do {
1949 pages_written = wb_do_writeback(wb);
1950 trace_writeback_pages_written(pages_written);
1951 } while (!list_empty(&wb->work_list));
1952 } else {
1953
1954
1955
1956
1957
1958 pages_written = writeback_inodes_wb(wb, 1024,
1959 WB_REASON_FORKER_THREAD);
1960 trace_writeback_pages_written(pages_written);
1961 }
1962
1963 if (!list_empty(&wb->work_list))
1964 wb_wakeup(wb);
1965 else if (wb_has_dirty_io(wb) && dirty_writeback_interval)
1966 wb_wakeup_delayed(wb);
1967
1968 current->flags &= ~PF_SWAPWRITE;
1969}
1970
1971
1972
1973
1974
1975static void __wakeup_flusher_threads_bdi(struct backing_dev_info *bdi,
1976 enum wb_reason reason)
1977{
1978 struct bdi_writeback *wb;
1979
1980 if (!bdi_has_dirty_io(bdi))
1981 return;
1982
1983 list_for_each_entry_rcu(wb, &bdi->wb_list, bdi_node)
1984 wb_start_writeback(wb, reason);
1985}
1986
1987void wakeup_flusher_threads_bdi(struct backing_dev_info *bdi,
1988 enum wb_reason reason)
1989{
1990 rcu_read_lock();
1991 __wakeup_flusher_threads_bdi(bdi, reason);
1992 rcu_read_unlock();
1993}
1994
1995
1996
1997
1998void wakeup_flusher_threads(enum wb_reason reason)
1999{
2000 struct backing_dev_info *bdi;
2001
2002
2003
2004
2005 if (blk_needs_flush_plug(current))
2006 blk_schedule_flush_plug(current);
2007
2008 rcu_read_lock();
2009 list_for_each_entry_rcu(bdi, &bdi_list, bdi_list)
2010 __wakeup_flusher_threads_bdi(bdi, reason);
2011 rcu_read_unlock();
2012}
2013
2014
2015
2016
2017
2018
2019
2020
2021
2022
2023
2024
2025
2026
2027
2028
2029static void wakeup_dirtytime_writeback(struct work_struct *w);
2030static DECLARE_DELAYED_WORK(dirtytime_work, wakeup_dirtytime_writeback);
2031
2032static void wakeup_dirtytime_writeback(struct work_struct *w)
2033{
2034 struct backing_dev_info *bdi;
2035
2036 rcu_read_lock();
2037 list_for_each_entry_rcu(bdi, &bdi_list, bdi_list) {
2038 struct bdi_writeback *wb;
2039
2040 list_for_each_entry_rcu(wb, &bdi->wb_list, bdi_node)
2041 if (!list_empty(&wb->b_dirty_time))
2042 wb_wakeup(wb);
2043 }
2044 rcu_read_unlock();
2045 schedule_delayed_work(&dirtytime_work, dirtytime_expire_interval * HZ);
2046}
2047
2048static int __init start_dirtytime_writeback(void)
2049{
2050 schedule_delayed_work(&dirtytime_work, dirtytime_expire_interval * HZ);
2051 return 0;
2052}
2053__initcall(start_dirtytime_writeback);
2054
2055int dirtytime_interval_handler(struct ctl_table *table, int write,
2056 void __user *buffer, size_t *lenp, loff_t *ppos)
2057{
2058 int ret;
2059
2060 ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
2061 if (ret == 0 && write)
2062 mod_delayed_work(system_wq, &dirtytime_work, 0);
2063 return ret;
2064}
2065
2066static noinline void block_dump___mark_inode_dirty(struct inode *inode)
2067{
2068 if (inode->i_ino || strcmp(inode->i_sb->s_id, "bdev")) {
2069 struct dentry *dentry;
2070 const char *name = "?";
2071
2072 dentry = d_find_alias(inode);
2073 if (dentry) {
2074 spin_lock(&dentry->d_lock);
2075 name = (const char *) dentry->d_name.name;
2076 }
2077 printk(KERN_DEBUG
2078 "%s(%d): dirtied inode %lu (%s) on %s\n",
2079 current->comm, task_pid_nr(current), inode->i_ino,
2080 name, inode->i_sb->s_id);
2081 if (dentry) {
2082 spin_unlock(&dentry->d_lock);
2083 dput(dentry);
2084 }
2085 }
2086}
2087
2088
2089
2090
2091
2092
2093
2094
2095
2096
2097
2098
2099
2100
2101
2102
2103
2104
2105
2106
2107
2108
2109
2110
2111
2112
2113
2114void __mark_inode_dirty(struct inode *inode, int flags)
2115{
2116 struct super_block *sb = inode->i_sb;
2117 int dirtytime;
2118
2119 trace_writeback_mark_inode_dirty(inode, flags);
2120
2121
2122
2123
2124
2125 if (flags & (I_DIRTY_INODE | I_DIRTY_TIME)) {
2126 trace_writeback_dirty_inode_start(inode, flags);
2127
2128 if (sb->s_op->dirty_inode)
2129 sb->s_op->dirty_inode(inode, flags);
2130
2131 trace_writeback_dirty_inode(inode, flags);
2132 }
2133 if (flags & I_DIRTY_INODE)
2134 flags &= ~I_DIRTY_TIME;
2135 dirtytime = flags & I_DIRTY_TIME;
2136
2137
2138
2139
2140
2141 smp_mb();
2142
2143 if (((inode->i_state & flags) == flags) ||
2144 (dirtytime && (inode->i_state & I_DIRTY_INODE)))
2145 return;
2146
2147 if (unlikely(block_dump))
2148 block_dump___mark_inode_dirty(inode);
2149
2150 spin_lock(&inode->i_lock);
2151 if (dirtytime && (inode->i_state & I_DIRTY_INODE))
2152 goto out_unlock_inode;
2153 if ((inode->i_state & flags) != flags) {
2154 const int was_dirty = inode->i_state & I_DIRTY;
2155
2156 inode_attach_wb(inode, NULL);
2157
2158 if (flags & I_DIRTY_INODE)
2159 inode->i_state &= ~I_DIRTY_TIME;
2160 inode->i_state |= flags;
2161
2162
2163
2164
2165
2166
2167 if (inode->i_state & I_SYNC)
2168 goto out_unlock_inode;
2169
2170
2171
2172
2173
2174 if (!S_ISBLK(inode->i_mode)) {
2175 if (inode_unhashed(inode))
2176 goto out_unlock_inode;
2177 }
2178 if (inode->i_state & I_FREEING)
2179 goto out_unlock_inode;
2180
2181
2182
2183
2184
2185 if (!was_dirty) {
2186 struct bdi_writeback *wb;
2187 struct list_head *dirty_list;
2188 bool wakeup_bdi = false;
2189
2190 wb = locked_inode_to_wb_and_lock_list(inode);
2191
2192 WARN(bdi_cap_writeback_dirty(wb->bdi) &&
2193 !test_bit(WB_registered, &wb->state),
2194 "bdi-%s not registered\n", wb->bdi->name);
2195
2196 inode->dirtied_when = jiffies;
2197 if (dirtytime)
2198 inode->dirtied_time_when = jiffies;
2199
2200 if (inode->i_state & I_DIRTY)
2201 dirty_list = &wb->b_dirty;
2202 else
2203 dirty_list = &wb->b_dirty_time;
2204
2205 wakeup_bdi = inode_io_list_move_locked(inode, wb,
2206 dirty_list);
2207
2208 spin_unlock(&wb->list_lock);
2209 trace_writeback_dirty_inode_enqueue(inode);
2210
2211
2212
2213
2214
2215
2216
2217 if (bdi_cap_writeback_dirty(wb->bdi) && wakeup_bdi)
2218 wb_wakeup_delayed(wb);
2219 return;
2220 }
2221 }
2222out_unlock_inode:
2223 spin_unlock(&inode->i_lock);
2224}
2225EXPORT_SYMBOL(__mark_inode_dirty);
2226
2227
2228
2229
2230
2231
2232
2233
2234
2235
2236static void wait_sb_inodes(struct super_block *sb)
2237{
2238 LIST_HEAD(sync_list);
2239
2240
2241
2242
2243
2244 WARN_ON(!rwsem_is_locked(&sb->s_umount));
2245
2246 mutex_lock(&sb->s_sync_lock);
2247
2248
2249
2250
2251
2252
2253
2254
2255
2256
2257 rcu_read_lock();
2258 spin_lock_irq(&sb->s_inode_wblist_lock);
2259 list_splice_init(&sb->s_inodes_wb, &sync_list);
2260
2261
2262
2263
2264
2265
2266
2267
2268 while (!list_empty(&sync_list)) {
2269 struct inode *inode = list_first_entry(&sync_list, struct inode,
2270 i_wb_list);
2271 struct address_space *mapping = inode->i_mapping;
2272
2273
2274
2275
2276
2277
2278
2279 list_move_tail(&inode->i_wb_list, &sb->s_inodes_wb);
2280
2281
2282
2283
2284
2285
2286 if (!mapping_tagged(mapping, PAGECACHE_TAG_WRITEBACK))
2287 continue;
2288
2289 spin_unlock_irq(&sb->s_inode_wblist_lock);
2290
2291 spin_lock(&inode->i_lock);
2292 if (inode->i_state & (I_FREEING|I_WILL_FREE|I_NEW)) {
2293 spin_unlock(&inode->i_lock);
2294
2295 spin_lock_irq(&sb->s_inode_wblist_lock);
2296 continue;
2297 }
2298 __iget(inode);
2299 spin_unlock(&inode->i_lock);
2300 rcu_read_unlock();
2301
2302
2303
2304
2305
2306
2307 filemap_fdatawait_keep_errors(mapping);
2308
2309 cond_resched();
2310
2311 iput(inode);
2312
2313 rcu_read_lock();
2314 spin_lock_irq(&sb->s_inode_wblist_lock);
2315 }
2316 spin_unlock_irq(&sb->s_inode_wblist_lock);
2317 rcu_read_unlock();
2318 mutex_unlock(&sb->s_sync_lock);
2319}
2320
2321static void __writeback_inodes_sb_nr(struct super_block *sb, unsigned long nr,
2322 enum wb_reason reason, bool skip_if_busy)
2323{
2324 DEFINE_WB_COMPLETION_ONSTACK(done);
2325 struct wb_writeback_work work = {
2326 .sb = sb,
2327 .sync_mode = WB_SYNC_NONE,
2328 .tagged_writepages = 1,
2329 .done = &done,
2330 .nr_pages = nr,
2331 .reason = reason,
2332 };
2333 struct backing_dev_info *bdi = sb->s_bdi;
2334
2335 if (!bdi_has_dirty_io(bdi) || bdi == &noop_backing_dev_info)
2336 return;
2337 WARN_ON(!rwsem_is_locked(&sb->s_umount));
2338
2339 bdi_split_work_to_wbs(sb->s_bdi, &work, skip_if_busy);
2340 wb_wait_for_completion(bdi, &done);
2341}
2342
2343
2344
2345
2346
2347
2348
2349
2350
2351
2352
2353void writeback_inodes_sb_nr(struct super_block *sb,
2354 unsigned long nr,
2355 enum wb_reason reason)
2356{
2357 __writeback_inodes_sb_nr(sb, nr, reason, false);
2358}
2359EXPORT_SYMBOL(writeback_inodes_sb_nr);
2360
2361
2362
2363
2364
2365
2366
2367
2368
2369
2370void writeback_inodes_sb(struct super_block *sb, enum wb_reason reason)
2371{
2372 return writeback_inodes_sb_nr(sb, get_nr_dirty_pages(), reason);
2373}
2374EXPORT_SYMBOL(writeback_inodes_sb);
2375
2376
2377
2378
2379
2380
2381
2382
2383void try_to_writeback_inodes_sb(struct super_block *sb, enum wb_reason reason)
2384{
2385 if (!down_read_trylock(&sb->s_umount))
2386 return;
2387
2388 __writeback_inodes_sb_nr(sb, get_nr_dirty_pages(), reason, true);
2389 up_read(&sb->s_umount);
2390}
2391EXPORT_SYMBOL(try_to_writeback_inodes_sb);
2392
2393
2394
2395
2396
2397
2398
2399
2400void sync_inodes_sb(struct super_block *sb)
2401{
2402 DEFINE_WB_COMPLETION_ONSTACK(done);
2403 struct wb_writeback_work work = {
2404 .sb = sb,
2405 .sync_mode = WB_SYNC_ALL,
2406 .nr_pages = LONG_MAX,
2407 .range_cyclic = 0,
2408 .done = &done,
2409 .reason = WB_REASON_SYNC,
2410 .for_sync = 1,
2411 };
2412 struct backing_dev_info *bdi = sb->s_bdi;
2413
2414
2415
2416
2417
2418
2419 if (bdi == &noop_backing_dev_info)
2420 return;
2421 WARN_ON(!rwsem_is_locked(&sb->s_umount));
2422
2423 bdi_split_work_to_wbs(bdi, &work, false);
2424 wb_wait_for_completion(bdi, &done);
2425
2426 wait_sb_inodes(sb);
2427}
2428EXPORT_SYMBOL(sync_inodes_sb);
2429
2430
2431
2432
2433
2434
2435
2436
2437
2438
2439
2440int write_inode_now(struct inode *inode, int sync)
2441{
2442 struct writeback_control wbc = {
2443 .nr_to_write = LONG_MAX,
2444 .sync_mode = sync ? WB_SYNC_ALL : WB_SYNC_NONE,
2445 .range_start = 0,
2446 .range_end = LLONG_MAX,
2447 };
2448
2449 if (!mapping_cap_writeback_dirty(inode->i_mapping))
2450 wbc.nr_to_write = 0;
2451
2452 might_sleep();
2453 return writeback_single_inode(inode, &wbc);
2454}
2455EXPORT_SYMBOL(write_inode_now);
2456
2457
2458
2459
2460
2461
2462
2463
2464
2465
2466
2467
2468int sync_inode(struct inode *inode, struct writeback_control *wbc)
2469{
2470 return writeback_single_inode(inode, wbc);
2471}
2472EXPORT_SYMBOL(sync_inode);
2473
2474
2475
2476
2477
2478
2479
2480
2481
2482
2483int sync_inode_metadata(struct inode *inode, int wait)
2484{
2485 struct writeback_control wbc = {
2486 .sync_mode = wait ? WB_SYNC_ALL : WB_SYNC_NONE,
2487 .nr_to_write = 0,
2488 };
2489
2490 return sync_inode(inode, &wbc);
2491}
2492EXPORT_SYMBOL(sync_inode_metadata);
2493