1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16#include <linux/kernel.h>
17#include <linux/export.h>
18#include <linux/spinlock.h>
19#include <linux/slab.h>
20#include <linux/sched.h>
21#include <linux/fs.h>
22#include <linux/mm.h>
23#include <linux/pagemap.h>
24#include <linux/kthread.h>
25#include <linux/writeback.h>
26#include <linux/blkdev.h>
27#include <linux/backing-dev.h>
28#include <linux/tracepoint.h>
29#include <linux/device.h>
30#include <linux/memcontrol.h>
31#include "internal.h"
32
33
34
35
36#define MIN_WRITEBACK_PAGES (4096UL >> (PAGE_SHIFT - 10))
37
38struct wb_completion {
39 atomic_t cnt;
40};
41
42
43
44
45struct wb_writeback_work {
46 long nr_pages;
47 struct super_block *sb;
48 unsigned long *older_than_this;
49 enum writeback_sync_modes sync_mode;
50 unsigned int tagged_writepages:1;
51 unsigned int for_kupdate:1;
52 unsigned int range_cyclic:1;
53 unsigned int for_background:1;
54 unsigned int for_sync:1;
55 unsigned int auto_free:1;
56 enum wb_reason reason;
57
58 struct list_head list;
59 struct wb_completion *done;
60};
61
62
63
64
65
66
67
68
69#define DEFINE_WB_COMPLETION_ONSTACK(cmpl) \
70 struct wb_completion cmpl = { \
71 .cnt = ATOMIC_INIT(1), \
72 }
73
74
75
76
77
78
79
80
81
82
83
84
85unsigned int dirtytime_expire_interval = 12 * 60 * 60;
86
87static inline struct inode *wb_inode(struct list_head *head)
88{
89 return list_entry(head, struct inode, i_io_list);
90}
91
92
93
94
95
96
97#define CREATE_TRACE_POINTS
98#include <trace/events/writeback.h>
99
100EXPORT_TRACEPOINT_SYMBOL_GPL(wbc_writepage);
101
102static bool wb_io_lists_populated(struct bdi_writeback *wb)
103{
104 if (wb_has_dirty_io(wb)) {
105 return false;
106 } else {
107 set_bit(WB_has_dirty_io, &wb->state);
108 WARN_ON_ONCE(!wb->avg_write_bandwidth);
109 atomic_long_add(wb->avg_write_bandwidth,
110 &wb->bdi->tot_write_bandwidth);
111 return true;
112 }
113}
114
115static void wb_io_lists_depopulated(struct bdi_writeback *wb)
116{
117 if (wb_has_dirty_io(wb) && list_empty(&wb->b_dirty) &&
118 list_empty(&wb->b_io) && list_empty(&wb->b_more_io)) {
119 clear_bit(WB_has_dirty_io, &wb->state);
120 WARN_ON_ONCE(atomic_long_sub_return(wb->avg_write_bandwidth,
121 &wb->bdi->tot_write_bandwidth) < 0);
122 }
123}
124
125
126
127
128
129
130
131
132
133
134
135static bool inode_io_list_move_locked(struct inode *inode,
136 struct bdi_writeback *wb,
137 struct list_head *head)
138{
139 assert_spin_locked(&wb->list_lock);
140
141 list_move(&inode->i_io_list, head);
142
143
144 if (head != &wb->b_dirty_time)
145 return wb_io_lists_populated(wb);
146
147 wb_io_lists_depopulated(wb);
148 return false;
149}
150
151
152
153
154
155
156
157
158
159static void inode_io_list_del_locked(struct inode *inode,
160 struct bdi_writeback *wb)
161{
162 assert_spin_locked(&wb->list_lock);
163
164 list_del_init(&inode->i_io_list);
165 wb_io_lists_depopulated(wb);
166}
167
168static void wb_wakeup(struct bdi_writeback *wb)
169{
170 spin_lock_bh(&wb->work_lock);
171 if (test_bit(WB_registered, &wb->state))
172 mod_delayed_work(bdi_wq, &wb->dwork, 0);
173 spin_unlock_bh(&wb->work_lock);
174}
175
176static void wb_queue_work(struct bdi_writeback *wb,
177 struct wb_writeback_work *work)
178{
179 trace_writeback_queue(wb, work);
180
181 spin_lock_bh(&wb->work_lock);
182 if (!test_bit(WB_registered, &wb->state))
183 goto out_unlock;
184 if (work->done)
185 atomic_inc(&work->done->cnt);
186 list_add_tail(&work->list, &wb->work_list);
187 mod_delayed_work(bdi_wq, &wb->dwork, 0);
188out_unlock:
189 spin_unlock_bh(&wb->work_lock);
190}
191
192
193
194
195
196
197
198
199
200
201
202
203static void wb_wait_for_completion(struct backing_dev_info *bdi,
204 struct wb_completion *done)
205{
206 atomic_dec(&done->cnt);
207 wait_event(bdi->wb_waitq, !atomic_read(&done->cnt));
208}
209
210#ifdef CONFIG_CGROUP_WRITEBACK
211
212
213#define WB_FRN_TIME_SHIFT 13
214#define WB_FRN_TIME_AVG_SHIFT 3
215#define WB_FRN_TIME_CUT_DIV 2
216#define WB_FRN_TIME_PERIOD (2 * (1 << WB_FRN_TIME_SHIFT))
217
218#define WB_FRN_HIST_SLOTS 16
219#define WB_FRN_HIST_UNIT (WB_FRN_TIME_PERIOD / WB_FRN_HIST_SLOTS)
220
221#define WB_FRN_HIST_THR_SLOTS (WB_FRN_HIST_SLOTS / 2)
222
223#define WB_FRN_HIST_MAX_SLOTS (WB_FRN_HIST_THR_SLOTS / 2 + 1)
224
225
226static atomic_t isw_nr_in_flight = ATOMIC_INIT(0);
227static struct workqueue_struct *isw_wq;
228
229void __inode_attach_wb(struct inode *inode, struct page *page)
230{
231 struct backing_dev_info *bdi = inode_to_bdi(inode);
232 struct bdi_writeback *wb = NULL;
233
234 if (inode_cgwb_enabled(inode)) {
235 struct cgroup_subsys_state *memcg_css;
236
237 if (page) {
238 memcg_css = mem_cgroup_css_from_page(page);
239 wb = wb_get_create(bdi, memcg_css, GFP_ATOMIC);
240 } else {
241
242 memcg_css = task_get_css(current, memory_cgrp_id);
243 wb = wb_get_create(bdi, memcg_css, GFP_ATOMIC);
244 css_put(memcg_css);
245 }
246 }
247
248 if (!wb)
249 wb = &bdi->wb;
250
251
252
253
254
255 if (unlikely(cmpxchg(&inode->i_wb, NULL, wb)))
256 wb_put(wb);
257}
258
259
260
261
262
263
264
265
266
267static struct bdi_writeback *
268locked_inode_to_wb_and_lock_list(struct inode *inode)
269 __releases(&inode->i_lock)
270 __acquires(&wb->list_lock)
271{
272 while (true) {
273 struct bdi_writeback *wb = inode_to_wb(inode);
274
275
276
277
278
279
280
281 wb_get(wb);
282 spin_unlock(&inode->i_lock);
283 spin_lock(&wb->list_lock);
284
285
286 if (likely(wb == inode->i_wb)) {
287 wb_put(wb);
288 return wb;
289 }
290
291 spin_unlock(&wb->list_lock);
292 wb_put(wb);
293 cpu_relax();
294 spin_lock(&inode->i_lock);
295 }
296}
297
298
299
300
301
302
303
304
305static struct bdi_writeback *inode_to_wb_and_lock_list(struct inode *inode)
306 __acquires(&wb->list_lock)
307{
308 spin_lock(&inode->i_lock);
309 return locked_inode_to_wb_and_lock_list(inode);
310}
311
312struct inode_switch_wbs_context {
313 struct inode *inode;
314 struct bdi_writeback *new_wb;
315
316 struct rcu_head rcu_head;
317 struct work_struct work;
318};
319
320static void inode_switch_wbs_work_fn(struct work_struct *work)
321{
322 struct inode_switch_wbs_context *isw =
323 container_of(work, struct inode_switch_wbs_context, work);
324 struct inode *inode = isw->inode;
325 struct address_space *mapping = inode->i_mapping;
326 struct bdi_writeback *old_wb = inode->i_wb;
327 struct bdi_writeback *new_wb = isw->new_wb;
328 struct radix_tree_iter iter;
329 bool switched = false;
330 void **slot;
331
332
333
334
335
336
337
338
339
340
341
342 if (old_wb < new_wb) {
343 spin_lock(&old_wb->list_lock);
344 spin_lock_nested(&new_wb->list_lock, SINGLE_DEPTH_NESTING);
345 } else {
346 spin_lock(&new_wb->list_lock);
347 spin_lock_nested(&old_wb->list_lock, SINGLE_DEPTH_NESTING);
348 }
349 spin_lock(&inode->i_lock);
350 spin_lock_irq(&mapping->tree_lock);
351
352
353
354
355
356 if (unlikely(inode->i_state & I_FREEING))
357 goto skip_switch;
358
359
360
361
362
363
364 radix_tree_for_each_tagged(slot, &mapping->page_tree, &iter, 0,
365 PAGECACHE_TAG_DIRTY) {
366 struct page *page = radix_tree_deref_slot_protected(slot,
367 &mapping->tree_lock);
368 if (likely(page) && PageDirty(page)) {
369 __dec_wb_stat(old_wb, WB_RECLAIMABLE);
370 __inc_wb_stat(new_wb, WB_RECLAIMABLE);
371 }
372 }
373
374 radix_tree_for_each_tagged(slot, &mapping->page_tree, &iter, 0,
375 PAGECACHE_TAG_WRITEBACK) {
376 struct page *page = radix_tree_deref_slot_protected(slot,
377 &mapping->tree_lock);
378 if (likely(page)) {
379 WARN_ON_ONCE(!PageWriteback(page));
380 __dec_wb_stat(old_wb, WB_WRITEBACK);
381 __inc_wb_stat(new_wb, WB_WRITEBACK);
382 }
383 }
384
385 wb_get(new_wb);
386
387
388
389
390
391
392
393 if (!list_empty(&inode->i_io_list)) {
394 struct inode *pos;
395
396 inode_io_list_del_locked(inode, old_wb);
397 inode->i_wb = new_wb;
398 list_for_each_entry(pos, &new_wb->b_dirty, i_io_list)
399 if (time_after_eq(inode->dirtied_when,
400 pos->dirtied_when))
401 break;
402 inode_io_list_move_locked(inode, new_wb, pos->i_io_list.prev);
403 } else {
404 inode->i_wb = new_wb;
405 }
406
407
408 inode->i_wb_frn_winner = 0;
409 inode->i_wb_frn_avg_time = 0;
410 inode->i_wb_frn_history = 0;
411 switched = true;
412skip_switch:
413
414
415
416
417 smp_store_release(&inode->i_state, inode->i_state & ~I_WB_SWITCH);
418
419 spin_unlock_irq(&mapping->tree_lock);
420 spin_unlock(&inode->i_lock);
421 spin_unlock(&new_wb->list_lock);
422 spin_unlock(&old_wb->list_lock);
423
424 if (switched) {
425 wb_wakeup(new_wb);
426 wb_put(old_wb);
427 }
428 wb_put(new_wb);
429
430 iput(inode);
431 kfree(isw);
432
433 atomic_dec(&isw_nr_in_flight);
434}
435
436static void inode_switch_wbs_rcu_fn(struct rcu_head *rcu_head)
437{
438 struct inode_switch_wbs_context *isw = container_of(rcu_head,
439 struct inode_switch_wbs_context, rcu_head);
440
441
442 INIT_WORK(&isw->work, inode_switch_wbs_work_fn);
443 queue_work(isw_wq, &isw->work);
444}
445
446
447
448
449
450
451
452
453
454static void inode_switch_wbs(struct inode *inode, int new_wb_id)
455{
456 struct backing_dev_info *bdi = inode_to_bdi(inode);
457 struct cgroup_subsys_state *memcg_css;
458 struct inode_switch_wbs_context *isw;
459
460
461 if (inode->i_state & I_WB_SWITCH)
462 return;
463
464 isw = kzalloc(sizeof(*isw), GFP_ATOMIC);
465 if (!isw)
466 return;
467
468
469 rcu_read_lock();
470 memcg_css = css_from_id(new_wb_id, &memory_cgrp_subsys);
471 if (memcg_css)
472 isw->new_wb = wb_get_create(bdi, memcg_css, GFP_ATOMIC);
473 rcu_read_unlock();
474 if (!isw->new_wb)
475 goto out_free;
476
477
478 spin_lock(&inode->i_lock);
479 if (!(inode->i_sb->s_flags & MS_ACTIVE) ||
480 inode->i_state & (I_WB_SWITCH | I_FREEING) ||
481 inode_to_wb(inode) == isw->new_wb) {
482 spin_unlock(&inode->i_lock);
483 goto out_free;
484 }
485 inode->i_state |= I_WB_SWITCH;
486 spin_unlock(&inode->i_lock);
487
488 ihold(inode);
489 isw->inode = inode;
490
491 atomic_inc(&isw_nr_in_flight);
492
493
494
495
496
497
498
499 call_rcu(&isw->rcu_head, inode_switch_wbs_rcu_fn);
500 return;
501
502out_free:
503 if (isw->new_wb)
504 wb_put(isw->new_wb);
505 kfree(isw);
506}
507
508
509
510
511
512
513
514
515
516
517
518void wbc_attach_and_unlock_inode(struct writeback_control *wbc,
519 struct inode *inode)
520{
521 if (!inode_cgwb_enabled(inode)) {
522 spin_unlock(&inode->i_lock);
523 return;
524 }
525
526 wbc->wb = inode_to_wb(inode);
527 wbc->inode = inode;
528
529 wbc->wb_id = wbc->wb->memcg_css->id;
530 wbc->wb_lcand_id = inode->i_wb_frn_winner;
531 wbc->wb_tcand_id = 0;
532 wbc->wb_bytes = 0;
533 wbc->wb_lcand_bytes = 0;
534 wbc->wb_tcand_bytes = 0;
535
536 wb_get(wbc->wb);
537 spin_unlock(&inode->i_lock);
538
539
540
541
542
543 if (unlikely(wb_dying(wbc->wb)))
544 inode_switch_wbs(inode, wbc->wb_id);
545}
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584void wbc_detach_inode(struct writeback_control *wbc)
585{
586 struct bdi_writeback *wb = wbc->wb;
587 struct inode *inode = wbc->inode;
588 unsigned long avg_time, max_bytes, max_time;
589 u16 history;
590 int max_id;
591
592 if (!wb)
593 return;
594
595 history = inode->i_wb_frn_history;
596 avg_time = inode->i_wb_frn_avg_time;
597
598
599 if (wbc->wb_bytes >= wbc->wb_lcand_bytes &&
600 wbc->wb_bytes >= wbc->wb_tcand_bytes) {
601 max_id = wbc->wb_id;
602 max_bytes = wbc->wb_bytes;
603 } else if (wbc->wb_lcand_bytes >= wbc->wb_tcand_bytes) {
604 max_id = wbc->wb_lcand_id;
605 max_bytes = wbc->wb_lcand_bytes;
606 } else {
607 max_id = wbc->wb_tcand_id;
608 max_bytes = wbc->wb_tcand_bytes;
609 }
610
611
612
613
614
615
616
617
618 max_time = DIV_ROUND_UP((max_bytes >> PAGE_SHIFT) << WB_FRN_TIME_SHIFT,
619 wb->avg_write_bandwidth);
620 if (avg_time)
621 avg_time += (max_time >> WB_FRN_TIME_AVG_SHIFT) -
622 (avg_time >> WB_FRN_TIME_AVG_SHIFT);
623 else
624 avg_time = max_time;
625
626 if (max_time >= avg_time / WB_FRN_TIME_CUT_DIV) {
627 int slots;
628
629
630
631
632
633
634
635
636
637 slots = min(DIV_ROUND_UP(max_time, WB_FRN_HIST_UNIT),
638 (unsigned long)WB_FRN_HIST_MAX_SLOTS);
639 history <<= slots;
640 if (wbc->wb_id != max_id)
641 history |= (1U << slots) - 1;
642
643
644
645
646
647
648
649
650 if (hweight32(history) > WB_FRN_HIST_THR_SLOTS)
651 inode_switch_wbs(inode, max_id);
652 }
653
654
655
656
657
658 inode->i_wb_frn_winner = max_id;
659 inode->i_wb_frn_avg_time = min(avg_time, (unsigned long)U16_MAX);
660 inode->i_wb_frn_history = history;
661
662 wb_put(wbc->wb);
663 wbc->wb = NULL;
664}
665
666
667
668
669
670
671
672
673
674
675
676void wbc_account_io(struct writeback_control *wbc, struct page *page,
677 size_t bytes)
678{
679 int id;
680
681
682
683
684
685
686
687 if (!wbc->wb)
688 return;
689
690 id = mem_cgroup_css_from_page(page)->id;
691
692 if (id == wbc->wb_id) {
693 wbc->wb_bytes += bytes;
694 return;
695 }
696
697 if (id == wbc->wb_lcand_id)
698 wbc->wb_lcand_bytes += bytes;
699
700
701 if (!wbc->wb_tcand_bytes)
702 wbc->wb_tcand_id = id;
703 if (id == wbc->wb_tcand_id)
704 wbc->wb_tcand_bytes += bytes;
705 else
706 wbc->wb_tcand_bytes -= min(bytes, wbc->wb_tcand_bytes);
707}
708EXPORT_SYMBOL_GPL(wbc_account_io);
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726int inode_congested(struct inode *inode, int cong_bits)
727{
728
729
730
731
732 if (inode && inode_to_wb_is_valid(inode)) {
733 struct bdi_writeback *wb;
734 bool locked, congested;
735
736 wb = unlocked_inode_to_wb_begin(inode, &locked);
737 congested = wb_congested(wb, cong_bits);
738 unlocked_inode_to_wb_end(inode, locked);
739 return congested;
740 }
741
742 return wb_congested(&inode_to_bdi(inode)->wb, cong_bits);
743}
744EXPORT_SYMBOL_GPL(inode_congested);
745
746
747
748
749
750
751
752
753
754
755static long wb_split_bdi_pages(struct bdi_writeback *wb, long nr_pages)
756{
757 unsigned long this_bw = wb->avg_write_bandwidth;
758 unsigned long tot_bw = atomic_long_read(&wb->bdi->tot_write_bandwidth);
759
760 if (nr_pages == LONG_MAX)
761 return LONG_MAX;
762
763
764
765
766
767
768 if (!tot_bw || this_bw >= tot_bw)
769 return nr_pages;
770 else
771 return DIV_ROUND_UP_ULL((u64)nr_pages * this_bw, tot_bw);
772}
773
774
775
776
777
778
779
780
781
782
783
784
785static void bdi_split_work_to_wbs(struct backing_dev_info *bdi,
786 struct wb_writeback_work *base_work,
787 bool skip_if_busy)
788{
789 struct bdi_writeback *last_wb = NULL;
790 struct bdi_writeback *wb = list_entry(&bdi->wb_list,
791 struct bdi_writeback, bdi_node);
792
793 might_sleep();
794restart:
795 rcu_read_lock();
796 list_for_each_entry_continue_rcu(wb, &bdi->wb_list, bdi_node) {
797 DEFINE_WB_COMPLETION_ONSTACK(fallback_work_done);
798 struct wb_writeback_work fallback_work;
799 struct wb_writeback_work *work;
800 long nr_pages;
801
802 if (last_wb) {
803 wb_put(last_wb);
804 last_wb = NULL;
805 }
806
807
808 if (!wb_has_dirty_io(wb) &&
809 (base_work->sync_mode == WB_SYNC_NONE ||
810 list_empty(&wb->b_dirty_time)))
811 continue;
812 if (skip_if_busy && writeback_in_progress(wb))
813 continue;
814
815 nr_pages = wb_split_bdi_pages(wb, base_work->nr_pages);
816
817 work = kmalloc(sizeof(*work), GFP_ATOMIC);
818 if (work) {
819 *work = *base_work;
820 work->nr_pages = nr_pages;
821 work->auto_free = 1;
822 wb_queue_work(wb, work);
823 continue;
824 }
825
826
827 work = &fallback_work;
828 *work = *base_work;
829 work->nr_pages = nr_pages;
830 work->auto_free = 0;
831 work->done = &fallback_work_done;
832
833 wb_queue_work(wb, work);
834
835
836
837
838
839
840 wb_get(wb);
841 last_wb = wb;
842
843 rcu_read_unlock();
844 wb_wait_for_completion(bdi, &fallback_work_done);
845 goto restart;
846 }
847 rcu_read_unlock();
848
849 if (last_wb)
850 wb_put(last_wb);
851}
852
853
854
855
856
857
858
859
860
861
862
863void cgroup_writeback_umount(void)
864{
865 if (atomic_read(&isw_nr_in_flight)) {
866 synchronize_rcu();
867 flush_workqueue(isw_wq);
868 }
869}
870
871static int __init cgroup_writeback_init(void)
872{
873 isw_wq = alloc_workqueue("inode_switch_wbs", 0, 0);
874 if (!isw_wq)
875 return -ENOMEM;
876 return 0;
877}
878fs_initcall(cgroup_writeback_init);
879
880#else
881
882static struct bdi_writeback *
883locked_inode_to_wb_and_lock_list(struct inode *inode)
884 __releases(&inode->i_lock)
885 __acquires(&wb->list_lock)
886{
887 struct bdi_writeback *wb = inode_to_wb(inode);
888
889 spin_unlock(&inode->i_lock);
890 spin_lock(&wb->list_lock);
891 return wb;
892}
893
894static struct bdi_writeback *inode_to_wb_and_lock_list(struct inode *inode)
895 __acquires(&wb->list_lock)
896{
897 struct bdi_writeback *wb = inode_to_wb(inode);
898
899 spin_lock(&wb->list_lock);
900 return wb;
901}
902
903static long wb_split_bdi_pages(struct bdi_writeback *wb, long nr_pages)
904{
905 return nr_pages;
906}
907
908static void bdi_split_work_to_wbs(struct backing_dev_info *bdi,
909 struct wb_writeback_work *base_work,
910 bool skip_if_busy)
911{
912 might_sleep();
913
914 if (!skip_if_busy || !writeback_in_progress(&bdi->wb)) {
915 base_work->auto_free = 0;
916 wb_queue_work(&bdi->wb, base_work);
917 }
918}
919
920#endif
921
922void wb_start_writeback(struct bdi_writeback *wb, long nr_pages,
923 bool range_cyclic, enum wb_reason reason)
924{
925 struct wb_writeback_work *work;
926
927 if (!wb_has_dirty_io(wb))
928 return;
929
930
931
932
933
934 work = kzalloc(sizeof(*work), GFP_ATOMIC);
935 if (!work) {
936 trace_writeback_nowork(wb);
937 wb_wakeup(wb);
938 return;
939 }
940
941 work->sync_mode = WB_SYNC_NONE;
942 work->nr_pages = nr_pages;
943 work->range_cyclic = range_cyclic;
944 work->reason = reason;
945 work->auto_free = 1;
946
947 wb_queue_work(wb, work);
948}
949
950
951
952
953
954
955
956
957
958
959
960void wb_start_background_writeback(struct bdi_writeback *wb)
961{
962
963
964
965
966 trace_writeback_wake_background(wb);
967 wb_wakeup(wb);
968}
969
970
971
972
973void inode_io_list_del(struct inode *inode)
974{
975 struct bdi_writeback *wb;
976
977 wb = inode_to_wb_and_lock_list(inode);
978 inode_io_list_del_locked(inode, wb);
979 spin_unlock(&wb->list_lock);
980}
981
982
983
984
985
986
987
988
989
990
991static void redirty_tail(struct inode *inode, struct bdi_writeback *wb)
992{
993 if (!list_empty(&wb->b_dirty)) {
994 struct inode *tail;
995
996 tail = wb_inode(wb->b_dirty.next);
997 if (time_before(inode->dirtied_when, tail->dirtied_when))
998 inode->dirtied_when = jiffies;
999 }
1000 inode_io_list_move_locked(inode, wb, &wb->b_dirty);
1001}
1002
1003
1004
1005
1006static void requeue_io(struct inode *inode, struct bdi_writeback *wb)
1007{
1008 inode_io_list_move_locked(inode, wb, &wb->b_more_io);
1009}
1010
1011static void inode_sync_complete(struct inode *inode)
1012{
1013 inode->i_state &= ~I_SYNC;
1014
1015 inode_add_lru(inode);
1016
1017 smp_mb();
1018 wake_up_bit(&inode->i_state, __I_SYNC);
1019}
1020
1021static bool inode_dirtied_after(struct inode *inode, unsigned long t)
1022{
1023 bool ret = time_after(inode->dirtied_when, t);
1024#ifndef CONFIG_64BIT
1025
1026
1027
1028
1029
1030
1031 ret = ret && time_before_eq(inode->dirtied_when, jiffies);
1032#endif
1033 return ret;
1034}
1035
1036#define EXPIRE_DIRTY_ATIME 0x0001
1037
1038
1039
1040
1041
1042static int move_expired_inodes(struct list_head *delaying_queue,
1043 struct list_head *dispatch_queue,
1044 int flags,
1045 struct wb_writeback_work *work)
1046{
1047 unsigned long *older_than_this = NULL;
1048 unsigned long expire_time;
1049 LIST_HEAD(tmp);
1050 struct list_head *pos, *node;
1051 struct super_block *sb = NULL;
1052 struct inode *inode;
1053 int do_sb_sort = 0;
1054 int moved = 0;
1055
1056 if ((flags & EXPIRE_DIRTY_ATIME) == 0)
1057 older_than_this = work->older_than_this;
1058 else if (!work->for_sync) {
1059 expire_time = jiffies - (dirtytime_expire_interval * HZ);
1060 older_than_this = &expire_time;
1061 }
1062 while (!list_empty(delaying_queue)) {
1063 inode = wb_inode(delaying_queue->prev);
1064 if (older_than_this &&
1065 inode_dirtied_after(inode, *older_than_this))
1066 break;
1067 list_move(&inode->i_io_list, &tmp);
1068 moved++;
1069 if (flags & EXPIRE_DIRTY_ATIME)
1070 set_bit(__I_DIRTY_TIME_EXPIRED, &inode->i_state);
1071 if (sb_is_blkdev_sb(inode->i_sb))
1072 continue;
1073 if (sb && sb != inode->i_sb)
1074 do_sb_sort = 1;
1075 sb = inode->i_sb;
1076 }
1077
1078
1079 if (!do_sb_sort) {
1080 list_splice(&tmp, dispatch_queue);
1081 goto out;
1082 }
1083
1084
1085 while (!list_empty(&tmp)) {
1086 sb = wb_inode(tmp.prev)->i_sb;
1087 list_for_each_prev_safe(pos, node, &tmp) {
1088 inode = wb_inode(pos);
1089 if (inode->i_sb == sb)
1090 list_move(&inode->i_io_list, dispatch_queue);
1091 }
1092 }
1093out:
1094 return moved;
1095}
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108static void queue_io(struct bdi_writeback *wb, struct wb_writeback_work *work)
1109{
1110 int moved;
1111
1112 assert_spin_locked(&wb->list_lock);
1113 list_splice_init(&wb->b_more_io, &wb->b_io);
1114 moved = move_expired_inodes(&wb->b_dirty, &wb->b_io, 0, work);
1115 moved += move_expired_inodes(&wb->b_dirty_time, &wb->b_io,
1116 EXPIRE_DIRTY_ATIME, work);
1117 if (moved)
1118 wb_io_lists_populated(wb);
1119 trace_writeback_queue_io(wb, work, moved);
1120}
1121
1122static int write_inode(struct inode *inode, struct writeback_control *wbc)
1123{
1124 int ret;
1125
1126 if (inode->i_sb->s_op->write_inode && !is_bad_inode(inode)) {
1127 trace_writeback_write_inode_start(inode, wbc);
1128 ret = inode->i_sb->s_op->write_inode(inode, wbc);
1129 trace_writeback_write_inode(inode, wbc);
1130 return ret;
1131 }
1132 return 0;
1133}
1134
1135
1136
1137
1138
1139static void __inode_wait_for_writeback(struct inode *inode)
1140 __releases(inode->i_lock)
1141 __acquires(inode->i_lock)
1142{
1143 DEFINE_WAIT_BIT(wq, &inode->i_state, __I_SYNC);
1144 wait_queue_head_t *wqh;
1145
1146 wqh = bit_waitqueue(&inode->i_state, __I_SYNC);
1147 while (inode->i_state & I_SYNC) {
1148 spin_unlock(&inode->i_lock);
1149 __wait_on_bit(wqh, &wq, bit_wait,
1150 TASK_UNINTERRUPTIBLE);
1151 spin_lock(&inode->i_lock);
1152 }
1153}
1154
1155
1156
1157
1158void inode_wait_for_writeback(struct inode *inode)
1159{
1160 spin_lock(&inode->i_lock);
1161 __inode_wait_for_writeback(inode);
1162 spin_unlock(&inode->i_lock);
1163}
1164
1165
1166
1167
1168
1169
1170static void inode_sleep_on_writeback(struct inode *inode)
1171 __releases(inode->i_lock)
1172{
1173 DEFINE_WAIT(wait);
1174 wait_queue_head_t *wqh = bit_waitqueue(&inode->i_state, __I_SYNC);
1175 int sleep;
1176
1177 prepare_to_wait(wqh, &wait, TASK_UNINTERRUPTIBLE);
1178 sleep = inode->i_state & I_SYNC;
1179 spin_unlock(&inode->i_lock);
1180 if (sleep)
1181 schedule();
1182 finish_wait(wqh, &wait);
1183}
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193static void requeue_inode(struct inode *inode, struct bdi_writeback *wb,
1194 struct writeback_control *wbc)
1195{
1196 if (inode->i_state & I_FREEING)
1197 return;
1198
1199
1200
1201
1202
1203
1204 if ((inode->i_state & I_DIRTY) &&
1205 (wbc->sync_mode == WB_SYNC_ALL || wbc->tagged_writepages))
1206 inode->dirtied_when = jiffies;
1207
1208 if (wbc->pages_skipped) {
1209
1210
1211
1212
1213 redirty_tail(inode, wb);
1214 return;
1215 }
1216
1217 if (mapping_tagged(inode->i_mapping, PAGECACHE_TAG_DIRTY)) {
1218
1219
1220
1221
1222 if (wbc->nr_to_write <= 0) {
1223
1224 requeue_io(inode, wb);
1225 } else {
1226
1227
1228
1229
1230
1231
1232
1233 redirty_tail(inode, wb);
1234 }
1235 } else if (inode->i_state & I_DIRTY) {
1236
1237
1238
1239
1240
1241 redirty_tail(inode, wb);
1242 } else if (inode->i_state & I_DIRTY_TIME) {
1243 inode->dirtied_when = jiffies;
1244 inode_io_list_move_locked(inode, wb, &wb->b_dirty_time);
1245 } else {
1246
1247 inode_io_list_del_locked(inode, wb);
1248 }
1249}
1250
1251
1252
1253
1254
1255
1256static int
1257__writeback_single_inode(struct inode *inode, struct writeback_control *wbc)
1258{
1259 struct address_space *mapping = inode->i_mapping;
1260 long nr_to_write = wbc->nr_to_write;
1261 unsigned dirty;
1262 int ret;
1263
1264 WARN_ON(!(inode->i_state & I_SYNC));
1265
1266 trace_writeback_single_inode_start(inode, wbc, nr_to_write);
1267
1268 ret = do_writepages(mapping, wbc);
1269
1270
1271
1272
1273
1274
1275
1276
1277 if (wbc->sync_mode == WB_SYNC_ALL && !wbc->for_sync) {
1278 int err = filemap_fdatawait(mapping);
1279 if (ret == 0)
1280 ret = err;
1281 }
1282
1283
1284
1285
1286
1287
1288 spin_lock(&inode->i_lock);
1289
1290 dirty = inode->i_state & I_DIRTY;
1291 if (inode->i_state & I_DIRTY_TIME) {
1292 if ((dirty & (I_DIRTY_SYNC | I_DIRTY_DATASYNC)) ||
1293 unlikely(inode->i_state & I_DIRTY_TIME_EXPIRED) ||
1294 unlikely(time_after(jiffies,
1295 (inode->dirtied_time_when +
1296 dirtytime_expire_interval * HZ)))) {
1297 dirty |= I_DIRTY_TIME | I_DIRTY_TIME_EXPIRED;
1298 trace_writeback_lazytime(inode);
1299 }
1300 } else
1301 inode->i_state &= ~I_DIRTY_TIME_EXPIRED;
1302 inode->i_state &= ~dirty;
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315 smp_mb();
1316
1317 if (mapping_tagged(mapping, PAGECACHE_TAG_DIRTY))
1318 inode->i_state |= I_DIRTY_PAGES;
1319
1320 spin_unlock(&inode->i_lock);
1321
1322 if (dirty & I_DIRTY_TIME)
1323 mark_inode_dirty_sync(inode);
1324
1325 if (dirty & ~I_DIRTY_PAGES) {
1326 int err = write_inode(inode, wbc);
1327 if (ret == 0)
1328 ret = err;
1329 }
1330 trace_writeback_single_inode(inode, wbc, nr_to_write);
1331 return ret;
1332}
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342static int writeback_single_inode(struct inode *inode,
1343 struct writeback_control *wbc)
1344{
1345 struct bdi_writeback *wb;
1346 int ret = 0;
1347
1348 spin_lock(&inode->i_lock);
1349 if (!atomic_read(&inode->i_count))
1350 WARN_ON(!(inode->i_state & (I_WILL_FREE|I_FREEING)));
1351 else
1352 WARN_ON(inode->i_state & I_WILL_FREE);
1353
1354 if (inode->i_state & I_SYNC) {
1355 if (wbc->sync_mode != WB_SYNC_ALL)
1356 goto out;
1357
1358
1359
1360
1361
1362 __inode_wait_for_writeback(inode);
1363 }
1364 WARN_ON(inode->i_state & I_SYNC);
1365
1366
1367
1368
1369
1370
1371
1372
1373 if (!(inode->i_state & I_DIRTY_ALL) &&
1374 (wbc->sync_mode != WB_SYNC_ALL ||
1375 !mapping_tagged(inode->i_mapping, PAGECACHE_TAG_WRITEBACK)))
1376 goto out;
1377 inode->i_state |= I_SYNC;
1378 wbc_attach_and_unlock_inode(wbc, inode);
1379
1380 ret = __writeback_single_inode(inode, wbc);
1381
1382 wbc_detach_inode(wbc);
1383
1384 wb = inode_to_wb_and_lock_list(inode);
1385 spin_lock(&inode->i_lock);
1386
1387
1388
1389
1390 if (!(inode->i_state & I_DIRTY_ALL))
1391 inode_io_list_del_locked(inode, wb);
1392 spin_unlock(&wb->list_lock);
1393 inode_sync_complete(inode);
1394out:
1395 spin_unlock(&inode->i_lock);
1396 return ret;
1397}
1398
1399static long writeback_chunk_size(struct bdi_writeback *wb,
1400 struct wb_writeback_work *work)
1401{
1402 long pages;
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417 if (work->sync_mode == WB_SYNC_ALL || work->tagged_writepages)
1418 pages = LONG_MAX;
1419 else {
1420 pages = min(wb->avg_write_bandwidth / 2,
1421 global_wb_domain.dirty_limit / DIRTY_SCOPE);
1422 pages = min(pages, work->nr_pages);
1423 pages = round_down(pages + MIN_WRITEBACK_PAGES,
1424 MIN_WRITEBACK_PAGES);
1425 }
1426
1427 return pages;
1428}
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439static long writeback_sb_inodes(struct super_block *sb,
1440 struct bdi_writeback *wb,
1441 struct wb_writeback_work *work)
1442{
1443 struct writeback_control wbc = {
1444 .sync_mode = work->sync_mode,
1445 .tagged_writepages = work->tagged_writepages,
1446 .for_kupdate = work->for_kupdate,
1447 .for_background = work->for_background,
1448 .for_sync = work->for_sync,
1449 .range_cyclic = work->range_cyclic,
1450 .range_start = 0,
1451 .range_end = LLONG_MAX,
1452 };
1453 unsigned long start_time = jiffies;
1454 long write_chunk;
1455 long wrote = 0;
1456
1457 while (!list_empty(&wb->b_io)) {
1458 struct inode *inode = wb_inode(wb->b_io.prev);
1459 struct bdi_writeback *tmp_wb;
1460
1461 if (inode->i_sb != sb) {
1462 if (work->sb) {
1463
1464
1465
1466
1467
1468 redirty_tail(inode, wb);
1469 continue;
1470 }
1471
1472
1473
1474
1475
1476
1477 break;
1478 }
1479
1480
1481
1482
1483
1484
1485 spin_lock(&inode->i_lock);
1486 if (inode->i_state & (I_NEW | I_FREEING | I_WILL_FREE)) {
1487 spin_unlock(&inode->i_lock);
1488 redirty_tail(inode, wb);
1489 continue;
1490 }
1491 if ((inode->i_state & I_SYNC) && wbc.sync_mode != WB_SYNC_ALL) {
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501 spin_unlock(&inode->i_lock);
1502 requeue_io(inode, wb);
1503 trace_writeback_sb_inodes_requeue(inode);
1504 continue;
1505 }
1506 spin_unlock(&wb->list_lock);
1507
1508
1509
1510
1511
1512
1513 if (inode->i_state & I_SYNC) {
1514
1515 inode_sleep_on_writeback(inode);
1516
1517 spin_lock(&wb->list_lock);
1518 continue;
1519 }
1520 inode->i_state |= I_SYNC;
1521 wbc_attach_and_unlock_inode(&wbc, inode);
1522
1523 write_chunk = writeback_chunk_size(wb, work);
1524 wbc.nr_to_write = write_chunk;
1525 wbc.pages_skipped = 0;
1526
1527
1528
1529
1530
1531 __writeback_single_inode(inode, &wbc);
1532
1533 wbc_detach_inode(&wbc);
1534 work->nr_pages -= write_chunk - wbc.nr_to_write;
1535 wrote += write_chunk - wbc.nr_to_write;
1536
1537 if (need_resched()) {
1538
1539
1540
1541
1542
1543
1544
1545
1546 blk_flush_plug(current);
1547 cond_resched();
1548 }
1549
1550
1551
1552
1553
1554 tmp_wb = inode_to_wb_and_lock_list(inode);
1555 spin_lock(&inode->i_lock);
1556 if (!(inode->i_state & I_DIRTY_ALL))
1557 wrote++;
1558 requeue_inode(inode, tmp_wb, &wbc);
1559 inode_sync_complete(inode);
1560 spin_unlock(&inode->i_lock);
1561
1562 if (unlikely(tmp_wb != wb)) {
1563 spin_unlock(&tmp_wb->list_lock);
1564 spin_lock(&wb->list_lock);
1565 }
1566
1567
1568
1569
1570
1571 if (wrote) {
1572 if (time_is_before_jiffies(start_time + HZ / 10UL))
1573 break;
1574 if (work->nr_pages <= 0)
1575 break;
1576 }
1577 }
1578 return wrote;
1579}
1580
1581static long __writeback_inodes_wb(struct bdi_writeback *wb,
1582 struct wb_writeback_work *work)
1583{
1584 unsigned long start_time = jiffies;
1585 long wrote = 0;
1586
1587 while (!list_empty(&wb->b_io)) {
1588 struct inode *inode = wb_inode(wb->b_io.prev);
1589 struct super_block *sb = inode->i_sb;
1590
1591 if (!trylock_super(sb)) {
1592
1593
1594
1595
1596
1597 redirty_tail(inode, wb);
1598 continue;
1599 }
1600 wrote += writeback_sb_inodes(sb, wb, work);
1601 up_read(&sb->s_umount);
1602
1603
1604 if (wrote) {
1605 if (time_is_before_jiffies(start_time + HZ / 10UL))
1606 break;
1607 if (work->nr_pages <= 0)
1608 break;
1609 }
1610 }
1611
1612 return wrote;
1613}
1614
1615static long writeback_inodes_wb(struct bdi_writeback *wb, long nr_pages,
1616 enum wb_reason reason)
1617{
1618 struct wb_writeback_work work = {
1619 .nr_pages = nr_pages,
1620 .sync_mode = WB_SYNC_NONE,
1621 .range_cyclic = 1,
1622 .reason = reason,
1623 };
1624 struct blk_plug plug;
1625
1626 blk_start_plug(&plug);
1627 spin_lock(&wb->list_lock);
1628 if (list_empty(&wb->b_io))
1629 queue_io(wb, &work);
1630 __writeback_inodes_wb(wb, &work);
1631 spin_unlock(&wb->list_lock);
1632 blk_finish_plug(&plug);
1633
1634 return nr_pages - work.nr_pages;
1635}
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652static long wb_writeback(struct bdi_writeback *wb,
1653 struct wb_writeback_work *work)
1654{
1655 unsigned long wb_start = jiffies;
1656 long nr_pages = work->nr_pages;
1657 unsigned long oldest_jif;
1658 struct inode *inode;
1659 long progress;
1660 struct blk_plug plug;
1661
1662 oldest_jif = jiffies;
1663 work->older_than_this = &oldest_jif;
1664
1665 blk_start_plug(&plug);
1666 spin_lock(&wb->list_lock);
1667 for (;;) {
1668
1669
1670
1671 if (work->nr_pages <= 0)
1672 break;
1673
1674
1675
1676
1677
1678
1679
1680 if ((work->for_background || work->for_kupdate) &&
1681 !list_empty(&wb->work_list))
1682 break;
1683
1684
1685
1686
1687
1688 if (work->for_background && !wb_over_bg_thresh(wb))
1689 break;
1690
1691
1692
1693
1694
1695
1696
1697 if (work->for_kupdate) {
1698 oldest_jif = jiffies -
1699 msecs_to_jiffies(dirty_expire_interval * 10);
1700 } else if (work->for_background)
1701 oldest_jif = jiffies;
1702
1703 trace_writeback_start(wb, work);
1704 if (list_empty(&wb->b_io))
1705 queue_io(wb, work);
1706 if (work->sb)
1707 progress = writeback_sb_inodes(work->sb, wb, work);
1708 else
1709 progress = __writeback_inodes_wb(wb, work);
1710 trace_writeback_written(wb, work);
1711
1712 wb_update_bandwidth(wb, wb_start);
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722 if (progress)
1723 continue;
1724
1725
1726
1727 if (list_empty(&wb->b_more_io))
1728 break;
1729
1730
1731
1732
1733
1734 if (!list_empty(&wb->b_more_io)) {
1735 trace_writeback_wait(wb, work);
1736 inode = wb_inode(wb->b_more_io.prev);
1737 spin_lock(&inode->i_lock);
1738 spin_unlock(&wb->list_lock);
1739
1740 inode_sleep_on_writeback(inode);
1741 spin_lock(&wb->list_lock);
1742 }
1743 }
1744 spin_unlock(&wb->list_lock);
1745 blk_finish_plug(&plug);
1746
1747 return nr_pages - work->nr_pages;
1748}
1749
1750
1751
1752
1753static struct wb_writeback_work *get_next_work_item(struct bdi_writeback *wb)
1754{
1755 struct wb_writeback_work *work = NULL;
1756
1757 spin_lock_bh(&wb->work_lock);
1758 if (!list_empty(&wb->work_list)) {
1759 work = list_entry(wb->work_list.next,
1760 struct wb_writeback_work, list);
1761 list_del_init(&work->list);
1762 }
1763 spin_unlock_bh(&wb->work_lock);
1764 return work;
1765}
1766
1767
1768
1769
1770
1771static unsigned long get_nr_dirty_pages(void)
1772{
1773 return global_page_state(NR_FILE_DIRTY) +
1774 global_page_state(NR_UNSTABLE_NFS) +
1775 get_nr_dirty_inodes();
1776}
1777
1778static long wb_check_background_flush(struct bdi_writeback *wb)
1779{
1780 if (wb_over_bg_thresh(wb)) {
1781
1782 struct wb_writeback_work work = {
1783 .nr_pages = LONG_MAX,
1784 .sync_mode = WB_SYNC_NONE,
1785 .for_background = 1,
1786 .range_cyclic = 1,
1787 .reason = WB_REASON_BACKGROUND,
1788 };
1789
1790 return wb_writeback(wb, &work);
1791 }
1792
1793 return 0;
1794}
1795
1796static long wb_check_old_data_flush(struct bdi_writeback *wb)
1797{
1798 unsigned long expired;
1799 long nr_pages;
1800
1801
1802
1803
1804 if (!dirty_writeback_interval)
1805 return 0;
1806
1807 expired = wb->last_old_flush +
1808 msecs_to_jiffies(dirty_writeback_interval * 10);
1809 if (time_before(jiffies, expired))
1810 return 0;
1811
1812 wb->last_old_flush = jiffies;
1813 nr_pages = get_nr_dirty_pages();
1814
1815 if (nr_pages) {
1816 struct wb_writeback_work work = {
1817 .nr_pages = nr_pages,
1818 .sync_mode = WB_SYNC_NONE,
1819 .for_kupdate = 1,
1820 .range_cyclic = 1,
1821 .reason = WB_REASON_PERIODIC,
1822 };
1823
1824 return wb_writeback(wb, &work);
1825 }
1826
1827 return 0;
1828}
1829
1830
1831
1832
1833static long wb_do_writeback(struct bdi_writeback *wb)
1834{
1835 struct wb_writeback_work *work;
1836 long wrote = 0;
1837
1838 set_bit(WB_writeback_running, &wb->state);
1839 while ((work = get_next_work_item(wb)) != NULL) {
1840 struct wb_completion *done = work->done;
1841
1842 trace_writeback_exec(wb, work);
1843
1844 wrote += wb_writeback(wb, work);
1845
1846 if (work->auto_free)
1847 kfree(work);
1848 if (done && atomic_dec_and_test(&done->cnt))
1849 wake_up_all(&wb->bdi->wb_waitq);
1850 }
1851
1852
1853
1854
1855 wrote += wb_check_old_data_flush(wb);
1856 wrote += wb_check_background_flush(wb);
1857 clear_bit(WB_writeback_running, &wb->state);
1858
1859 return wrote;
1860}
1861
1862
1863
1864
1865
1866void wb_workfn(struct work_struct *work)
1867{
1868 struct bdi_writeback *wb = container_of(to_delayed_work(work),
1869 struct bdi_writeback, dwork);
1870 long pages_written;
1871
1872 set_worker_desc("flush-%s", dev_name(wb->bdi->dev));
1873 current->flags |= PF_SWAPWRITE;
1874
1875 if (likely(!current_is_workqueue_rescuer() ||
1876 !test_bit(WB_registered, &wb->state))) {
1877
1878
1879
1880
1881
1882
1883 do {
1884 pages_written = wb_do_writeback(wb);
1885 trace_writeback_pages_written(pages_written);
1886 } while (!list_empty(&wb->work_list));
1887 } else {
1888
1889
1890
1891
1892
1893 pages_written = writeback_inodes_wb(wb, 1024,
1894 WB_REASON_FORKER_THREAD);
1895 trace_writeback_pages_written(pages_written);
1896 }
1897
1898 if (!list_empty(&wb->work_list))
1899 mod_delayed_work(bdi_wq, &wb->dwork, 0);
1900 else if (wb_has_dirty_io(wb) && dirty_writeback_interval)
1901 wb_wakeup_delayed(wb);
1902
1903 current->flags &= ~PF_SWAPWRITE;
1904}
1905
1906
1907
1908
1909
1910void wakeup_flusher_threads(long nr_pages, enum wb_reason reason)
1911{
1912 struct backing_dev_info *bdi;
1913
1914 if (!nr_pages)
1915 nr_pages = get_nr_dirty_pages();
1916
1917 rcu_read_lock();
1918 list_for_each_entry_rcu(bdi, &bdi_list, bdi_list) {
1919 struct bdi_writeback *wb;
1920
1921 if (!bdi_has_dirty_io(bdi))
1922 continue;
1923
1924 list_for_each_entry_rcu(wb, &bdi->wb_list, bdi_node)
1925 wb_start_writeback(wb, wb_split_bdi_pages(wb, nr_pages),
1926 false, reason);
1927 }
1928 rcu_read_unlock();
1929}
1930
1931
1932
1933
1934
1935
1936
1937
1938
1939
1940
1941
1942
1943
1944
1945
1946static void wakeup_dirtytime_writeback(struct work_struct *w);
1947static DECLARE_DELAYED_WORK(dirtytime_work, wakeup_dirtytime_writeback);
1948
1949static void wakeup_dirtytime_writeback(struct work_struct *w)
1950{
1951 struct backing_dev_info *bdi;
1952
1953 rcu_read_lock();
1954 list_for_each_entry_rcu(bdi, &bdi_list, bdi_list) {
1955 struct bdi_writeback *wb;
1956
1957 list_for_each_entry_rcu(wb, &bdi->wb_list, bdi_node)
1958 if (!list_empty(&wb->b_dirty_time))
1959 wb_wakeup(wb);
1960 }
1961 rcu_read_unlock();
1962 schedule_delayed_work(&dirtytime_work, dirtytime_expire_interval * HZ);
1963}
1964
1965static int __init start_dirtytime_writeback(void)
1966{
1967 schedule_delayed_work(&dirtytime_work, dirtytime_expire_interval * HZ);
1968 return 0;
1969}
1970__initcall(start_dirtytime_writeback);
1971
1972int dirtytime_interval_handler(struct ctl_table *table, int write,
1973 void __user *buffer, size_t *lenp, loff_t *ppos)
1974{
1975 int ret;
1976
1977 ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
1978 if (ret == 0 && write)
1979 mod_delayed_work(system_wq, &dirtytime_work, 0);
1980 return ret;
1981}
1982
1983static noinline void block_dump___mark_inode_dirty(struct inode *inode)
1984{
1985 if (inode->i_ino || strcmp(inode->i_sb->s_id, "bdev")) {
1986 struct dentry *dentry;
1987 const char *name = "?";
1988
1989 dentry = d_find_alias(inode);
1990 if (dentry) {
1991 spin_lock(&dentry->d_lock);
1992 name = (const char *) dentry->d_name.name;
1993 }
1994 printk(KERN_DEBUG
1995 "%s(%d): dirtied inode %lu (%s) on %s\n",
1996 current->comm, task_pid_nr(current), inode->i_ino,
1997 name, inode->i_sb->s_id);
1998 if (dentry) {
1999 spin_unlock(&dentry->d_lock);
2000 dput(dentry);
2001 }
2002 }
2003}
2004
2005
2006
2007
2008
2009
2010
2011
2012
2013
2014
2015
2016
2017
2018
2019
2020
2021
2022
2023
2024
2025
2026
2027
2028
2029void __mark_inode_dirty(struct inode *inode, int flags)
2030{
2031#define I_DIRTY_INODE (I_DIRTY_SYNC | I_DIRTY_DATASYNC)
2032 struct super_block *sb = inode->i_sb;
2033 int dirtytime;
2034
2035 trace_writeback_mark_inode_dirty(inode, flags);
2036
2037
2038
2039
2040
2041 if (flags & (I_DIRTY_SYNC | I_DIRTY_DATASYNC | I_DIRTY_TIME)) {
2042 trace_writeback_dirty_inode_start(inode, flags);
2043
2044 if (sb->s_op->dirty_inode)
2045 sb->s_op->dirty_inode(inode, flags);
2046
2047 trace_writeback_dirty_inode(inode, flags);
2048 }
2049 if (flags & I_DIRTY_INODE)
2050 flags &= ~I_DIRTY_TIME;
2051 dirtytime = flags & I_DIRTY_TIME;
2052
2053
2054
2055
2056
2057 smp_mb();
2058
2059 if (((inode->i_state & flags) == flags) ||
2060 (dirtytime && (inode->i_state & I_DIRTY_INODE)))
2061 return;
2062
2063 if (unlikely(block_dump))
2064 block_dump___mark_inode_dirty(inode);
2065
2066 spin_lock(&inode->i_lock);
2067 if (dirtytime && (inode->i_state & I_DIRTY_INODE))
2068 goto out_unlock_inode;
2069 if ((inode->i_state & flags) != flags) {
2070 const int was_dirty = inode->i_state & I_DIRTY;
2071
2072 inode_attach_wb(inode, NULL);
2073
2074 if (flags & I_DIRTY_INODE)
2075 inode->i_state &= ~I_DIRTY_TIME;
2076 inode->i_state |= flags;
2077
2078
2079
2080
2081
2082
2083 if (inode->i_state & I_SYNC)
2084 goto out_unlock_inode;
2085
2086
2087
2088
2089
2090 if (!S_ISBLK(inode->i_mode)) {
2091 if (inode_unhashed(inode))
2092 goto out_unlock_inode;
2093 }
2094 if (inode->i_state & I_FREEING)
2095 goto out_unlock_inode;
2096
2097
2098
2099
2100
2101 if (!was_dirty) {
2102 struct bdi_writeback *wb;
2103 struct list_head *dirty_list;
2104 bool wakeup_bdi = false;
2105
2106 wb = locked_inode_to_wb_and_lock_list(inode);
2107
2108 WARN(bdi_cap_writeback_dirty(wb->bdi) &&
2109 !test_bit(WB_registered, &wb->state),
2110 "bdi-%s not registered\n", wb->bdi->name);
2111
2112 inode->dirtied_when = jiffies;
2113 if (dirtytime)
2114 inode->dirtied_time_when = jiffies;
2115
2116 if (inode->i_state & (I_DIRTY_INODE | I_DIRTY_PAGES))
2117 dirty_list = &wb->b_dirty;
2118 else
2119 dirty_list = &wb->b_dirty_time;
2120
2121 wakeup_bdi = inode_io_list_move_locked(inode, wb,
2122 dirty_list);
2123
2124 spin_unlock(&wb->list_lock);
2125 trace_writeback_dirty_inode_enqueue(inode);
2126
2127
2128
2129
2130
2131
2132
2133 if (bdi_cap_writeback_dirty(wb->bdi) && wakeup_bdi)
2134 wb_wakeup_delayed(wb);
2135 return;
2136 }
2137 }
2138out_unlock_inode:
2139 spin_unlock(&inode->i_lock);
2140
2141#undef I_DIRTY_INODE
2142}
2143EXPORT_SYMBOL(__mark_inode_dirty);
2144
2145
2146
2147
2148
2149
2150
2151
2152
2153
2154static void wait_sb_inodes(struct super_block *sb)
2155{
2156 struct inode *inode, *old_inode = NULL;
2157
2158
2159
2160
2161
2162 WARN_ON(!rwsem_is_locked(&sb->s_umount));
2163
2164 mutex_lock(&sb->s_sync_lock);
2165 spin_lock(&sb->s_inode_list_lock);
2166
2167
2168
2169
2170
2171
2172
2173
2174 list_for_each_entry(inode, &sb->s_inodes, i_sb_list) {
2175 struct address_space *mapping = inode->i_mapping;
2176
2177 spin_lock(&inode->i_lock);
2178 if ((inode->i_state & (I_FREEING|I_WILL_FREE|I_NEW)) ||
2179 (mapping->nrpages == 0)) {
2180 spin_unlock(&inode->i_lock);
2181 continue;
2182 }
2183 __iget(inode);
2184 spin_unlock(&inode->i_lock);
2185 spin_unlock(&sb->s_inode_list_lock);
2186
2187
2188
2189
2190
2191
2192
2193
2194
2195 iput(old_inode);
2196 old_inode = inode;
2197
2198
2199
2200
2201
2202
2203 filemap_fdatawait_keep_errors(mapping);
2204
2205 cond_resched();
2206
2207 spin_lock(&sb->s_inode_list_lock);
2208 }
2209 spin_unlock(&sb->s_inode_list_lock);
2210 iput(old_inode);
2211 mutex_unlock(&sb->s_sync_lock);
2212}
2213
2214static void __writeback_inodes_sb_nr(struct super_block *sb, unsigned long nr,
2215 enum wb_reason reason, bool skip_if_busy)
2216{
2217 DEFINE_WB_COMPLETION_ONSTACK(done);
2218 struct wb_writeback_work work = {
2219 .sb = sb,
2220 .sync_mode = WB_SYNC_NONE,
2221 .tagged_writepages = 1,
2222 .done = &done,
2223 .nr_pages = nr,
2224 .reason = reason,
2225 };
2226 struct backing_dev_info *bdi = sb->s_bdi;
2227
2228 if (!bdi_has_dirty_io(bdi) || bdi == &noop_backing_dev_info)
2229 return;
2230 WARN_ON(!rwsem_is_locked(&sb->s_umount));
2231
2232 bdi_split_work_to_wbs(sb->s_bdi, &work, skip_if_busy);
2233 wb_wait_for_completion(bdi, &done);
2234}
2235
2236
2237
2238
2239
2240
2241
2242
2243
2244
2245
2246void writeback_inodes_sb_nr(struct super_block *sb,
2247 unsigned long nr,
2248 enum wb_reason reason)
2249{
2250 __writeback_inodes_sb_nr(sb, nr, reason, false);
2251}
2252EXPORT_SYMBOL(writeback_inodes_sb_nr);
2253
2254
2255
2256
2257
2258
2259
2260
2261
2262
2263void writeback_inodes_sb(struct super_block *sb, enum wb_reason reason)
2264{
2265 return writeback_inodes_sb_nr(sb, get_nr_dirty_pages(), reason);
2266}
2267EXPORT_SYMBOL(writeback_inodes_sb);
2268
2269
2270
2271
2272
2273
2274
2275
2276
2277
2278bool try_to_writeback_inodes_sb_nr(struct super_block *sb, unsigned long nr,
2279 enum wb_reason reason)
2280{
2281 if (!down_read_trylock(&sb->s_umount))
2282 return false;
2283
2284 __writeback_inodes_sb_nr(sb, nr, reason, true);
2285 up_read(&sb->s_umount);
2286 return true;
2287}
2288EXPORT_SYMBOL(try_to_writeback_inodes_sb_nr);
2289
2290
2291
2292
2293
2294
2295
2296
2297
2298bool try_to_writeback_inodes_sb(struct super_block *sb, enum wb_reason reason)
2299{
2300 return try_to_writeback_inodes_sb_nr(sb, get_nr_dirty_pages(), reason);
2301}
2302EXPORT_SYMBOL(try_to_writeback_inodes_sb);
2303
2304
2305
2306
2307
2308
2309
2310
2311void sync_inodes_sb(struct super_block *sb)
2312{
2313 DEFINE_WB_COMPLETION_ONSTACK(done);
2314 struct wb_writeback_work work = {
2315 .sb = sb,
2316 .sync_mode = WB_SYNC_ALL,
2317 .nr_pages = LONG_MAX,
2318 .range_cyclic = 0,
2319 .done = &done,
2320 .reason = WB_REASON_SYNC,
2321 .for_sync = 1,
2322 };
2323 struct backing_dev_info *bdi = sb->s_bdi;
2324
2325
2326
2327
2328
2329
2330 if (bdi == &noop_backing_dev_info)
2331 return;
2332 WARN_ON(!rwsem_is_locked(&sb->s_umount));
2333
2334 bdi_split_work_to_wbs(bdi, &work, false);
2335 wb_wait_for_completion(bdi, &done);
2336
2337 wait_sb_inodes(sb);
2338}
2339EXPORT_SYMBOL(sync_inodes_sb);
2340
2341
2342
2343
2344
2345
2346
2347
2348
2349
2350
2351int write_inode_now(struct inode *inode, int sync)
2352{
2353 struct writeback_control wbc = {
2354 .nr_to_write = LONG_MAX,
2355 .sync_mode = sync ? WB_SYNC_ALL : WB_SYNC_NONE,
2356 .range_start = 0,
2357 .range_end = LLONG_MAX,
2358 };
2359
2360 if (!mapping_cap_writeback_dirty(inode->i_mapping))
2361 wbc.nr_to_write = 0;
2362
2363 might_sleep();
2364 return writeback_single_inode(inode, &wbc);
2365}
2366EXPORT_SYMBOL(write_inode_now);
2367
2368
2369
2370
2371
2372
2373
2374
2375
2376
2377
2378
2379int sync_inode(struct inode *inode, struct writeback_control *wbc)
2380{
2381 return writeback_single_inode(inode, wbc);
2382}
2383EXPORT_SYMBOL(sync_inode);
2384
2385
2386
2387
2388
2389
2390
2391
2392
2393
2394int sync_inode_metadata(struct inode *inode, int wait)
2395{
2396 struct writeback_control wbc = {
2397 .sync_mode = wait ? WB_SYNC_ALL : WB_SYNC_NONE,
2398 .nr_to_write = 0,
2399 };
2400
2401 return sync_inode(inode, &wbc);
2402}
2403EXPORT_SYMBOL(sync_inode_metadata);
2404