1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21#include <linux/kernel.h>
22#include <linux/sched/signal.h>
23#include <linux/syscalls.h>
24#include <linux/fs.h>
25#include <linux/iomap.h>
26#include <linux/mm.h>
27#include <linux/percpu.h>
28#include <linux/slab.h>
29#include <linux/capability.h>
30#include <linux/blkdev.h>
31#include <linux/file.h>
32#include <linux/quotaops.h>
33#include <linux/highmem.h>
34#include <linux/export.h>
35#include <linux/backing-dev.h>
36#include <linux/writeback.h>
37#include <linux/hash.h>
38#include <linux/suspend.h>
39#include <linux/buffer_head.h>
40#include <linux/task_io_accounting_ops.h>
41#include <linux/bio.h>
42#include <linux/cpu.h>
43#include <linux/bitops.h>
44#include <linux/mpage.h>
45#include <linux/bit_spinlock.h>
46#include <linux/pagevec.h>
47#include <linux/sched/mm.h>
48#include <trace/events/block.h>
49
50static int fsync_buffers_list(spinlock_t *lock, struct list_head *list);
51static int submit_bh_wbc(int op, int op_flags, struct buffer_head *bh,
52 enum rw_hint hint, struct writeback_control *wbc);
53
54#define BH_ENTRY(list) list_entry((list), struct buffer_head, b_assoc_buffers)
55
56inline void touch_buffer(struct buffer_head *bh)
57{
58 trace_block_touch_buffer(bh);
59 mark_page_accessed(bh->b_page);
60}
61EXPORT_SYMBOL(touch_buffer);
62
63void __lock_buffer(struct buffer_head *bh)
64{
65 wait_on_bit_lock_io(&bh->b_state, BH_Lock, TASK_UNINTERRUPTIBLE);
66}
67EXPORT_SYMBOL(__lock_buffer);
68
69void unlock_buffer(struct buffer_head *bh)
70{
71 clear_bit_unlock(BH_Lock, &bh->b_state);
72 smp_mb__after_atomic();
73 wake_up_bit(&bh->b_state, BH_Lock);
74}
75EXPORT_SYMBOL(unlock_buffer);
76
77
78
79
80
81
82void buffer_check_dirty_writeback(struct page *page,
83 bool *dirty, bool *writeback)
84{
85 struct buffer_head *head, *bh;
86 *dirty = false;
87 *writeback = false;
88
89 BUG_ON(!PageLocked(page));
90
91 if (!page_has_buffers(page))
92 return;
93
94 if (PageWriteback(page))
95 *writeback = true;
96
97 head = page_buffers(page);
98 bh = head;
99 do {
100 if (buffer_locked(bh))
101 *writeback = true;
102
103 if (buffer_dirty(bh))
104 *dirty = true;
105
106 bh = bh->b_this_page;
107 } while (bh != head);
108}
109EXPORT_SYMBOL(buffer_check_dirty_writeback);
110
111
112
113
114
115
116void __wait_on_buffer(struct buffer_head * bh)
117{
118 wait_on_bit_io(&bh->b_state, BH_Lock, TASK_UNINTERRUPTIBLE);
119}
120EXPORT_SYMBOL(__wait_on_buffer);
121
122static void
123__clear_page_buffers(struct page *page)
124{
125 ClearPagePrivate(page);
126 set_page_private(page, 0);
127 put_page(page);
128}
129
130static void buffer_io_error(struct buffer_head *bh, char *msg)
131{
132 if (!test_bit(BH_Quiet, &bh->b_state))
133 printk_ratelimited(KERN_ERR
134 "Buffer I/O error on dev %pg, logical block %llu%s\n",
135 bh->b_bdev, (unsigned long long)bh->b_blocknr, msg);
136}
137
138
139
140
141
142
143
144
145
146static void __end_buffer_read_notouch(struct buffer_head *bh, int uptodate)
147{
148 if (uptodate) {
149 set_buffer_uptodate(bh);
150 } else {
151
152 clear_buffer_uptodate(bh);
153 }
154 unlock_buffer(bh);
155}
156
157
158
159
160
161void end_buffer_read_sync(struct buffer_head *bh, int uptodate)
162{
163 __end_buffer_read_notouch(bh, uptodate);
164 put_bh(bh);
165}
166EXPORT_SYMBOL(end_buffer_read_sync);
167
168void end_buffer_write_sync(struct buffer_head *bh, int uptodate)
169{
170 if (uptodate) {
171 set_buffer_uptodate(bh);
172 } else {
173 buffer_io_error(bh, ", lost sync page write");
174 mark_buffer_write_io_error(bh);
175 clear_buffer_uptodate(bh);
176 }
177 unlock_buffer(bh);
178 put_bh(bh);
179}
180EXPORT_SYMBOL(end_buffer_write_sync);
181
182
183
184
185
186
187
188
189
190
191
192static struct buffer_head *
193__find_get_block_slow(struct block_device *bdev, sector_t block)
194{
195 struct inode *bd_inode = bdev->bd_inode;
196 struct address_space *bd_mapping = bd_inode->i_mapping;
197 struct buffer_head *ret = NULL;
198 pgoff_t index;
199 struct buffer_head *bh;
200 struct buffer_head *head;
201 struct page *page;
202 int all_mapped = 1;
203 static DEFINE_RATELIMIT_STATE(last_warned, HZ, 1);
204
205 index = block >> (PAGE_SHIFT - bd_inode->i_blkbits);
206 page = find_get_page_flags(bd_mapping, index, FGP_ACCESSED);
207 if (!page)
208 goto out;
209
210 spin_lock(&bd_mapping->private_lock);
211 if (!page_has_buffers(page))
212 goto out_unlock;
213 head = page_buffers(page);
214 bh = head;
215 do {
216 if (!buffer_mapped(bh))
217 all_mapped = 0;
218 else if (bh->b_blocknr == block) {
219 ret = bh;
220 get_bh(bh);
221 goto out_unlock;
222 }
223 bh = bh->b_this_page;
224 } while (bh != head);
225
226
227
228
229
230
231 ratelimit_set_flags(&last_warned, RATELIMIT_MSG_ON_RELEASE);
232 if (all_mapped && __ratelimit(&last_warned)) {
233 printk("__find_get_block_slow() failed. block=%llu, "
234 "b_blocknr=%llu, b_state=0x%08lx, b_size=%zu, "
235 "device %pg blocksize: %d\n",
236 (unsigned long long)block,
237 (unsigned long long)bh->b_blocknr,
238 bh->b_state, bh->b_size, bdev,
239 1 << bd_inode->i_blkbits);
240 }
241out_unlock:
242 spin_unlock(&bd_mapping->private_lock);
243 put_page(page);
244out:
245 return ret;
246}
247
248
249
250
251
252static void end_buffer_async_read(struct buffer_head *bh, int uptodate)
253{
254 unsigned long flags;
255 struct buffer_head *first;
256 struct buffer_head *tmp;
257 struct page *page;
258 int page_uptodate = 1;
259
260 BUG_ON(!buffer_async_read(bh));
261
262 page = bh->b_page;
263 if (uptodate) {
264 set_buffer_uptodate(bh);
265 } else {
266 clear_buffer_uptodate(bh);
267 buffer_io_error(bh, ", async page read");
268 SetPageError(page);
269 }
270
271
272
273
274
275
276 first = page_buffers(page);
277 local_irq_save(flags);
278 bit_spin_lock(BH_Uptodate_Lock, &first->b_state);
279 clear_buffer_async_read(bh);
280 unlock_buffer(bh);
281 tmp = bh;
282 do {
283 if (!buffer_uptodate(tmp))
284 page_uptodate = 0;
285 if (buffer_async_read(tmp)) {
286 BUG_ON(!buffer_locked(tmp));
287 goto still_busy;
288 }
289 tmp = tmp->b_this_page;
290 } while (tmp != bh);
291 bit_spin_unlock(BH_Uptodate_Lock, &first->b_state);
292 local_irq_restore(flags);
293
294
295
296
297
298 if (page_uptodate && !PageError(page))
299 SetPageUptodate(page);
300 unlock_page(page);
301 return;
302
303still_busy:
304 bit_spin_unlock(BH_Uptodate_Lock, &first->b_state);
305 local_irq_restore(flags);
306 return;
307}
308
309
310
311
312
313void end_buffer_async_write(struct buffer_head *bh, int uptodate)
314{
315 unsigned long flags;
316 struct buffer_head *first;
317 struct buffer_head *tmp;
318 struct page *page;
319
320 BUG_ON(!buffer_async_write(bh));
321
322 page = bh->b_page;
323 if (uptodate) {
324 set_buffer_uptodate(bh);
325 } else {
326 buffer_io_error(bh, ", lost async page write");
327 mark_buffer_write_io_error(bh);
328 clear_buffer_uptodate(bh);
329 SetPageError(page);
330 }
331
332 first = page_buffers(page);
333 local_irq_save(flags);
334 bit_spin_lock(BH_Uptodate_Lock, &first->b_state);
335
336 clear_buffer_async_write(bh);
337 unlock_buffer(bh);
338 tmp = bh->b_this_page;
339 while (tmp != bh) {
340 if (buffer_async_write(tmp)) {
341 BUG_ON(!buffer_locked(tmp));
342 goto still_busy;
343 }
344 tmp = tmp->b_this_page;
345 }
346 bit_spin_unlock(BH_Uptodate_Lock, &first->b_state);
347 local_irq_restore(flags);
348 end_page_writeback(page);
349 return;
350
351still_busy:
352 bit_spin_unlock(BH_Uptodate_Lock, &first->b_state);
353 local_irq_restore(flags);
354 return;
355}
356EXPORT_SYMBOL(end_buffer_async_write);
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379static void mark_buffer_async_read(struct buffer_head *bh)
380{
381 bh->b_end_io = end_buffer_async_read;
382 set_buffer_async_read(bh);
383}
384
385static void mark_buffer_async_write_endio(struct buffer_head *bh,
386 bh_end_io_t *handler)
387{
388 bh->b_end_io = handler;
389 set_buffer_async_write(bh);
390}
391
392void mark_buffer_async_write(struct buffer_head *bh)
393{
394 mark_buffer_async_write_endio(bh, end_buffer_async_write);
395}
396EXPORT_SYMBOL(mark_buffer_async_write);
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451static void __remove_assoc_queue(struct buffer_head *bh)
452{
453 list_del_init(&bh->b_assoc_buffers);
454 WARN_ON(!bh->b_assoc_map);
455 bh->b_assoc_map = NULL;
456}
457
458int inode_has_buffers(struct inode *inode)
459{
460 return !list_empty(&inode->i_data.private_list);
461}
462
463
464
465
466
467
468
469
470
471
472
473static int osync_buffers_list(spinlock_t *lock, struct list_head *list)
474{
475 struct buffer_head *bh;
476 struct list_head *p;
477 int err = 0;
478
479 spin_lock(lock);
480repeat:
481 list_for_each_prev(p, list) {
482 bh = BH_ENTRY(p);
483 if (buffer_locked(bh)) {
484 get_bh(bh);
485 spin_unlock(lock);
486 wait_on_buffer(bh);
487 if (!buffer_uptodate(bh))
488 err = -EIO;
489 brelse(bh);
490 spin_lock(lock);
491 goto repeat;
492 }
493 }
494 spin_unlock(lock);
495 return err;
496}
497
498void emergency_thaw_bdev(struct super_block *sb)
499{
500 while (sb->s_bdev && !thaw_bdev(sb->s_bdev, sb))
501 printk(KERN_WARNING "Emergency Thaw on %pg\n", sb->s_bdev);
502}
503
504
505
506
507
508
509
510
511
512
513
514
515int sync_mapping_buffers(struct address_space *mapping)
516{
517 struct address_space *buffer_mapping = mapping->private_data;
518
519 if (buffer_mapping == NULL || list_empty(&mapping->private_list))
520 return 0;
521
522 return fsync_buffers_list(&buffer_mapping->private_lock,
523 &mapping->private_list);
524}
525EXPORT_SYMBOL(sync_mapping_buffers);
526
527
528
529
530
531
532
533void write_boundary_block(struct block_device *bdev,
534 sector_t bblock, unsigned blocksize)
535{
536 struct buffer_head *bh = __find_get_block(bdev, bblock + 1, blocksize);
537 if (bh) {
538 if (buffer_dirty(bh))
539 ll_rw_block(REQ_OP_WRITE, 0, 1, &bh);
540 put_bh(bh);
541 }
542}
543
544void mark_buffer_dirty_inode(struct buffer_head *bh, struct inode *inode)
545{
546 struct address_space *mapping = inode->i_mapping;
547 struct address_space *buffer_mapping = bh->b_page->mapping;
548
549 mark_buffer_dirty(bh);
550 if (!mapping->private_data) {
551 mapping->private_data = buffer_mapping;
552 } else {
553 BUG_ON(mapping->private_data != buffer_mapping);
554 }
555 if (!bh->b_assoc_map) {
556 spin_lock(&buffer_mapping->private_lock);
557 list_move_tail(&bh->b_assoc_buffers,
558 &mapping->private_list);
559 bh->b_assoc_map = mapping;
560 spin_unlock(&buffer_mapping->private_lock);
561 }
562}
563EXPORT_SYMBOL(mark_buffer_dirty_inode);
564
565
566
567
568
569
570
571
572
573
574void __set_page_dirty(struct page *page, struct address_space *mapping,
575 int warn)
576{
577 unsigned long flags;
578
579 xa_lock_irqsave(&mapping->i_pages, flags);
580 if (page->mapping) {
581 WARN_ON_ONCE(warn && !PageUptodate(page));
582 account_page_dirtied(page, mapping);
583 __xa_set_mark(&mapping->i_pages, page_index(page),
584 PAGECACHE_TAG_DIRTY);
585 }
586 xa_unlock_irqrestore(&mapping->i_pages, flags);
587}
588EXPORT_SYMBOL_GPL(__set_page_dirty);
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615int __set_page_dirty_buffers(struct page *page)
616{
617 int newly_dirty;
618 struct address_space *mapping = page_mapping(page);
619
620 if (unlikely(!mapping))
621 return !TestSetPageDirty(page);
622
623 spin_lock(&mapping->private_lock);
624 if (page_has_buffers(page)) {
625 struct buffer_head *head = page_buffers(page);
626 struct buffer_head *bh = head;
627
628 do {
629 set_buffer_dirty(bh);
630 bh = bh->b_this_page;
631 } while (bh != head);
632 }
633
634
635
636
637 lock_page_memcg(page);
638 newly_dirty = !TestSetPageDirty(page);
639 spin_unlock(&mapping->private_lock);
640
641 if (newly_dirty)
642 __set_page_dirty(page, mapping, 1);
643
644 unlock_page_memcg(page);
645
646 if (newly_dirty)
647 __mark_inode_dirty(mapping->host, I_DIRTY_PAGES);
648
649 return newly_dirty;
650}
651EXPORT_SYMBOL(__set_page_dirty_buffers);
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672static int fsync_buffers_list(spinlock_t *lock, struct list_head *list)
673{
674 struct buffer_head *bh;
675 struct list_head tmp;
676 struct address_space *mapping;
677 int err = 0, err2;
678 struct blk_plug plug;
679
680 INIT_LIST_HEAD(&tmp);
681 blk_start_plug(&plug);
682
683 spin_lock(lock);
684 while (!list_empty(list)) {
685 bh = BH_ENTRY(list->next);
686 mapping = bh->b_assoc_map;
687 __remove_assoc_queue(bh);
688
689
690 smp_mb();
691 if (buffer_dirty(bh) || buffer_locked(bh)) {
692 list_add(&bh->b_assoc_buffers, &tmp);
693 bh->b_assoc_map = mapping;
694 if (buffer_dirty(bh)) {
695 get_bh(bh);
696 spin_unlock(lock);
697
698
699
700
701
702
703
704 write_dirty_buffer(bh, REQ_SYNC);
705
706
707
708
709
710
711
712 brelse(bh);
713 spin_lock(lock);
714 }
715 }
716 }
717
718 spin_unlock(lock);
719 blk_finish_plug(&plug);
720 spin_lock(lock);
721
722 while (!list_empty(&tmp)) {
723 bh = BH_ENTRY(tmp.prev);
724 get_bh(bh);
725 mapping = bh->b_assoc_map;
726 __remove_assoc_queue(bh);
727
728
729 smp_mb();
730 if (buffer_dirty(bh)) {
731 list_add(&bh->b_assoc_buffers,
732 &mapping->private_list);
733 bh->b_assoc_map = mapping;
734 }
735 spin_unlock(lock);
736 wait_on_buffer(bh);
737 if (!buffer_uptodate(bh))
738 err = -EIO;
739 brelse(bh);
740 spin_lock(lock);
741 }
742
743 spin_unlock(lock);
744 err2 = osync_buffers_list(lock, list);
745 if (err)
746 return err;
747 else
748 return err2;
749}
750
751
752
753
754
755
756
757
758
759
760void invalidate_inode_buffers(struct inode *inode)
761{
762 if (inode_has_buffers(inode)) {
763 struct address_space *mapping = &inode->i_data;
764 struct list_head *list = &mapping->private_list;
765 struct address_space *buffer_mapping = mapping->private_data;
766
767 spin_lock(&buffer_mapping->private_lock);
768 while (!list_empty(list))
769 __remove_assoc_queue(BH_ENTRY(list->next));
770 spin_unlock(&buffer_mapping->private_lock);
771 }
772}
773EXPORT_SYMBOL(invalidate_inode_buffers);
774
775
776
777
778
779
780
781int remove_inode_buffers(struct inode *inode)
782{
783 int ret = 1;
784
785 if (inode_has_buffers(inode)) {
786 struct address_space *mapping = &inode->i_data;
787 struct list_head *list = &mapping->private_list;
788 struct address_space *buffer_mapping = mapping->private_data;
789
790 spin_lock(&buffer_mapping->private_lock);
791 while (!list_empty(list)) {
792 struct buffer_head *bh = BH_ENTRY(list->next);
793 if (buffer_dirty(bh)) {
794 ret = 0;
795 break;
796 }
797 __remove_assoc_queue(bh);
798 }
799 spin_unlock(&buffer_mapping->private_lock);
800 }
801 return ret;
802}
803
804
805
806
807
808
809
810
811
812
813struct buffer_head *alloc_page_buffers(struct page *page, unsigned long size,
814 bool retry)
815{
816 struct buffer_head *bh, *head;
817 gfp_t gfp = GFP_NOFS | __GFP_ACCOUNT;
818 long offset;
819 struct mem_cgroup *memcg;
820
821 if (retry)
822 gfp |= __GFP_NOFAIL;
823
824 memcg = get_mem_cgroup_from_page(page);
825 memalloc_use_memcg(memcg);
826
827 head = NULL;
828 offset = PAGE_SIZE;
829 while ((offset -= size) >= 0) {
830 bh = alloc_buffer_head(gfp);
831 if (!bh)
832 goto no_grow;
833
834 bh->b_this_page = head;
835 bh->b_blocknr = -1;
836 head = bh;
837
838 bh->b_size = size;
839
840
841 set_bh_page(bh, page, offset);
842 }
843out:
844 memalloc_unuse_memcg();
845 mem_cgroup_put(memcg);
846 return head;
847
848
849
850no_grow:
851 if (head) {
852 do {
853 bh = head;
854 head = head->b_this_page;
855 free_buffer_head(bh);
856 } while (head);
857 }
858
859 goto out;
860}
861EXPORT_SYMBOL_GPL(alloc_page_buffers);
862
863static inline void
864link_dev_buffers(struct page *page, struct buffer_head *head)
865{
866 struct buffer_head *bh, *tail;
867
868 bh = head;
869 do {
870 tail = bh;
871 bh = bh->b_this_page;
872 } while (bh);
873 tail->b_this_page = head;
874 attach_page_buffers(page, head);
875}
876
877static sector_t blkdev_max_block(struct block_device *bdev, unsigned int size)
878{
879 sector_t retval = ~((sector_t)0);
880 loff_t sz = i_size_read(bdev->bd_inode);
881
882 if (sz) {
883 unsigned int sizebits = blksize_bits(size);
884 retval = (sz >> sizebits);
885 }
886 return retval;
887}
888
889
890
891
892static sector_t
893init_page_buffers(struct page *page, struct block_device *bdev,
894 sector_t block, int size)
895{
896 struct buffer_head *head = page_buffers(page);
897 struct buffer_head *bh = head;
898 int uptodate = PageUptodate(page);
899 sector_t end_block = blkdev_max_block(I_BDEV(bdev->bd_inode), size);
900
901 do {
902 if (!buffer_mapped(bh)) {
903 bh->b_end_io = NULL;
904 bh->b_private = NULL;
905 bh->b_bdev = bdev;
906 bh->b_blocknr = block;
907 if (uptodate)
908 set_buffer_uptodate(bh);
909 if (block < end_block)
910 set_buffer_mapped(bh);
911 }
912 block++;
913 bh = bh->b_this_page;
914 } while (bh != head);
915
916
917
918
919 return end_block;
920}
921
922
923
924
925
926
927static int
928grow_dev_page(struct block_device *bdev, sector_t block,
929 pgoff_t index, int size, int sizebits, gfp_t gfp)
930{
931 struct inode *inode = bdev->bd_inode;
932 struct page *page;
933 struct buffer_head *bh;
934 sector_t end_block;
935 int ret = 0;
936 gfp_t gfp_mask;
937
938 gfp_mask = mapping_gfp_constraint(inode->i_mapping, ~__GFP_FS) | gfp;
939
940
941
942
943
944
945
946 gfp_mask |= __GFP_NOFAIL;
947
948 page = find_or_create_page(inode->i_mapping, index, gfp_mask);
949
950 BUG_ON(!PageLocked(page));
951
952 if (page_has_buffers(page)) {
953 bh = page_buffers(page);
954 if (bh->b_size == size) {
955 end_block = init_page_buffers(page, bdev,
956 (sector_t)index << sizebits,
957 size);
958 goto done;
959 }
960 if (!try_to_free_buffers(page))
961 goto failed;
962 }
963
964
965
966
967 bh = alloc_page_buffers(page, size, true);
968
969
970
971
972
973
974 spin_lock(&inode->i_mapping->private_lock);
975 link_dev_buffers(page, bh);
976 end_block = init_page_buffers(page, bdev, (sector_t)index << sizebits,
977 size);
978 spin_unlock(&inode->i_mapping->private_lock);
979done:
980 ret = (block < end_block) ? 1 : -ENXIO;
981failed:
982 unlock_page(page);
983 put_page(page);
984 return ret;
985}
986
987
988
989
990
991static int
992grow_buffers(struct block_device *bdev, sector_t block, int size, gfp_t gfp)
993{
994 pgoff_t index;
995 int sizebits;
996
997 sizebits = -1;
998 do {
999 sizebits++;
1000 } while ((size << sizebits) < PAGE_SIZE);
1001
1002 index = block >> sizebits;
1003
1004
1005
1006
1007
1008 if (unlikely(index != block >> sizebits)) {
1009 printk(KERN_ERR "%s: requested out-of-range block %llu for "
1010 "device %pg\n",
1011 __func__, (unsigned long long)block,
1012 bdev);
1013 return -EIO;
1014 }
1015
1016
1017 return grow_dev_page(bdev, block, index, size, sizebits, gfp);
1018}
1019
1020static struct buffer_head *
1021__getblk_slow(struct block_device *bdev, sector_t block,
1022 unsigned size, gfp_t gfp)
1023{
1024
1025 if (unlikely(size & (bdev_logical_block_size(bdev)-1) ||
1026 (size < 512 || size > PAGE_SIZE))) {
1027 printk(KERN_ERR "getblk(): invalid block size %d requested\n",
1028 size);
1029 printk(KERN_ERR "logical block size: %d\n",
1030 bdev_logical_block_size(bdev));
1031
1032 dump_stack();
1033 return NULL;
1034 }
1035
1036 for (;;) {
1037 struct buffer_head *bh;
1038 int ret;
1039
1040 bh = __find_get_block(bdev, block, size);
1041 if (bh)
1042 return bh;
1043
1044 ret = grow_buffers(bdev, block, size, gfp);
1045 if (ret < 0)
1046 return NULL;
1047 }
1048}
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085void mark_buffer_dirty(struct buffer_head *bh)
1086{
1087 WARN_ON_ONCE(!buffer_uptodate(bh));
1088
1089 trace_block_dirty_buffer(bh);
1090
1091
1092
1093
1094
1095
1096
1097 if (buffer_dirty(bh)) {
1098 smp_mb();
1099 if (buffer_dirty(bh))
1100 return;
1101 }
1102
1103 if (!test_set_buffer_dirty(bh)) {
1104 struct page *page = bh->b_page;
1105 struct address_space *mapping = NULL;
1106
1107 lock_page_memcg(page);
1108 if (!TestSetPageDirty(page)) {
1109 mapping = page_mapping(page);
1110 if (mapping)
1111 __set_page_dirty(page, mapping, 0);
1112 }
1113 unlock_page_memcg(page);
1114 if (mapping)
1115 __mark_inode_dirty(mapping->host, I_DIRTY_PAGES);
1116 }
1117}
1118EXPORT_SYMBOL(mark_buffer_dirty);
1119
1120void mark_buffer_write_io_error(struct buffer_head *bh)
1121{
1122 set_buffer_write_io_error(bh);
1123
1124 if (bh->b_page && bh->b_page->mapping)
1125 mapping_set_error(bh->b_page->mapping, -EIO);
1126 if (bh->b_assoc_map)
1127 mapping_set_error(bh->b_assoc_map, -EIO);
1128}
1129EXPORT_SYMBOL(mark_buffer_write_io_error);
1130
1131
1132
1133
1134
1135
1136
1137
1138void __brelse(struct buffer_head * buf)
1139{
1140 if (atomic_read(&buf->b_count)) {
1141 put_bh(buf);
1142 return;
1143 }
1144 WARN(1, KERN_ERR "VFS: brelse: Trying to free free buffer\n");
1145}
1146EXPORT_SYMBOL(__brelse);
1147
1148
1149
1150
1151
1152void __bforget(struct buffer_head *bh)
1153{
1154 clear_buffer_dirty(bh);
1155 if (bh->b_assoc_map) {
1156 struct address_space *buffer_mapping = bh->b_page->mapping;
1157
1158 spin_lock(&buffer_mapping->private_lock);
1159 list_del_init(&bh->b_assoc_buffers);
1160 bh->b_assoc_map = NULL;
1161 spin_unlock(&buffer_mapping->private_lock);
1162 }
1163 __brelse(bh);
1164}
1165EXPORT_SYMBOL(__bforget);
1166
1167static struct buffer_head *__bread_slow(struct buffer_head *bh)
1168{
1169 lock_buffer(bh);
1170 if (buffer_uptodate(bh)) {
1171 unlock_buffer(bh);
1172 return bh;
1173 } else {
1174 get_bh(bh);
1175 bh->b_end_io = end_buffer_read_sync;
1176 submit_bh(REQ_OP_READ, 0, bh);
1177 wait_on_buffer(bh);
1178 if (buffer_uptodate(bh))
1179 return bh;
1180 }
1181 brelse(bh);
1182 return NULL;
1183}
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199#define BH_LRU_SIZE 16
1200
1201struct bh_lru {
1202 struct buffer_head *bhs[BH_LRU_SIZE];
1203};
1204
1205static DEFINE_PER_CPU(struct bh_lru, bh_lrus) = {{ NULL }};
1206
1207#ifdef CONFIG_SMP
1208#define bh_lru_lock() local_irq_disable()
1209#define bh_lru_unlock() local_irq_enable()
1210#else
1211#define bh_lru_lock() preempt_disable()
1212#define bh_lru_unlock() preempt_enable()
1213#endif
1214
1215static inline void check_irqs_on(void)
1216{
1217#ifdef irqs_disabled
1218 BUG_ON(irqs_disabled());
1219#endif
1220}
1221
1222
1223
1224
1225
1226
1227static void bh_lru_install(struct buffer_head *bh)
1228{
1229 struct buffer_head *evictee = bh;
1230 struct bh_lru *b;
1231 int i;
1232
1233 check_irqs_on();
1234 bh_lru_lock();
1235
1236 b = this_cpu_ptr(&bh_lrus);
1237 for (i = 0; i < BH_LRU_SIZE; i++) {
1238 swap(evictee, b->bhs[i]);
1239 if (evictee == bh) {
1240 bh_lru_unlock();
1241 return;
1242 }
1243 }
1244
1245 get_bh(bh);
1246 bh_lru_unlock();
1247 brelse(evictee);
1248}
1249
1250
1251
1252
1253static struct buffer_head *
1254lookup_bh_lru(struct block_device *bdev, sector_t block, unsigned size)
1255{
1256 struct buffer_head *ret = NULL;
1257 unsigned int i;
1258
1259 check_irqs_on();
1260 bh_lru_lock();
1261 for (i = 0; i < BH_LRU_SIZE; i++) {
1262 struct buffer_head *bh = __this_cpu_read(bh_lrus.bhs[i]);
1263
1264 if (bh && bh->b_blocknr == block && bh->b_bdev == bdev &&
1265 bh->b_size == size) {
1266 if (i) {
1267 while (i) {
1268 __this_cpu_write(bh_lrus.bhs[i],
1269 __this_cpu_read(bh_lrus.bhs[i - 1]));
1270 i--;
1271 }
1272 __this_cpu_write(bh_lrus.bhs[0], bh);
1273 }
1274 get_bh(bh);
1275 ret = bh;
1276 break;
1277 }
1278 }
1279 bh_lru_unlock();
1280 return ret;
1281}
1282
1283
1284
1285
1286
1287
1288struct buffer_head *
1289__find_get_block(struct block_device *bdev, sector_t block, unsigned size)
1290{
1291 struct buffer_head *bh = lookup_bh_lru(bdev, block, size);
1292
1293 if (bh == NULL) {
1294
1295 bh = __find_get_block_slow(bdev, block);
1296 if (bh)
1297 bh_lru_install(bh);
1298 } else
1299 touch_buffer(bh);
1300
1301 return bh;
1302}
1303EXPORT_SYMBOL(__find_get_block);
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313struct buffer_head *
1314__getblk_gfp(struct block_device *bdev, sector_t block,
1315 unsigned size, gfp_t gfp)
1316{
1317 struct buffer_head *bh = __find_get_block(bdev, block, size);
1318
1319 might_sleep();
1320 if (bh == NULL)
1321 bh = __getblk_slow(bdev, block, size, gfp);
1322 return bh;
1323}
1324EXPORT_SYMBOL(__getblk_gfp);
1325
1326
1327
1328
1329void __breadahead(struct block_device *bdev, sector_t block, unsigned size)
1330{
1331 struct buffer_head *bh = __getblk(bdev, block, size);
1332 if (likely(bh)) {
1333 ll_rw_block(REQ_OP_READ, REQ_RAHEAD, 1, &bh);
1334 brelse(bh);
1335 }
1336}
1337EXPORT_SYMBOL(__breadahead);
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351struct buffer_head *
1352__bread_gfp(struct block_device *bdev, sector_t block,
1353 unsigned size, gfp_t gfp)
1354{
1355 struct buffer_head *bh = __getblk_gfp(bdev, block, size, gfp);
1356
1357 if (likely(bh) && !buffer_uptodate(bh))
1358 bh = __bread_slow(bh);
1359 return bh;
1360}
1361EXPORT_SYMBOL(__bread_gfp);
1362
1363
1364
1365
1366
1367
1368static void invalidate_bh_lru(void *arg)
1369{
1370 struct bh_lru *b = &get_cpu_var(bh_lrus);
1371 int i;
1372
1373 for (i = 0; i < BH_LRU_SIZE; i++) {
1374 brelse(b->bhs[i]);
1375 b->bhs[i] = NULL;
1376 }
1377 put_cpu_var(bh_lrus);
1378}
1379
1380static bool has_bh_in_lru(int cpu, void *dummy)
1381{
1382 struct bh_lru *b = per_cpu_ptr(&bh_lrus, cpu);
1383 int i;
1384
1385 for (i = 0; i < BH_LRU_SIZE; i++) {
1386 if (b->bhs[i])
1387 return 1;
1388 }
1389
1390 return 0;
1391}
1392
1393void invalidate_bh_lrus(void)
1394{
1395 on_each_cpu_cond(has_bh_in_lru, invalidate_bh_lru, NULL, 1, GFP_KERNEL);
1396}
1397EXPORT_SYMBOL_GPL(invalidate_bh_lrus);
1398
1399void set_bh_page(struct buffer_head *bh,
1400 struct page *page, unsigned long offset)
1401{
1402 bh->b_page = page;
1403 BUG_ON(offset >= PAGE_SIZE);
1404 if (PageHighMem(page))
1405
1406
1407
1408 bh->b_data = (char *)(0 + offset);
1409 else
1410 bh->b_data = page_address(page) + offset;
1411}
1412EXPORT_SYMBOL(set_bh_page);
1413
1414
1415
1416
1417
1418
1419#define BUFFER_FLAGS_DISCARD \
1420 (1 << BH_Mapped | 1 << BH_New | 1 << BH_Req | \
1421 1 << BH_Delay | 1 << BH_Unwritten)
1422
1423static void discard_buffer(struct buffer_head * bh)
1424{
1425 unsigned long b_state, b_state_old;
1426
1427 lock_buffer(bh);
1428 clear_buffer_dirty(bh);
1429 bh->b_bdev = NULL;
1430 b_state = bh->b_state;
1431 for (;;) {
1432 b_state_old = cmpxchg(&bh->b_state, b_state,
1433 (b_state & ~BUFFER_FLAGS_DISCARD));
1434 if (b_state_old == b_state)
1435 break;
1436 b_state = b_state_old;
1437 }
1438 unlock_buffer(bh);
1439}
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457void block_invalidatepage(struct page *page, unsigned int offset,
1458 unsigned int length)
1459{
1460 struct buffer_head *head, *bh, *next;
1461 unsigned int curr_off = 0;
1462 unsigned int stop = length + offset;
1463
1464 BUG_ON(!PageLocked(page));
1465 if (!page_has_buffers(page))
1466 goto out;
1467
1468
1469
1470
1471 BUG_ON(stop > PAGE_SIZE || stop < length);
1472
1473 head = page_buffers(page);
1474 bh = head;
1475 do {
1476 unsigned int next_off = curr_off + bh->b_size;
1477 next = bh->b_this_page;
1478
1479
1480
1481
1482 if (next_off > stop)
1483 goto out;
1484
1485
1486
1487
1488 if (offset <= curr_off)
1489 discard_buffer(bh);
1490 curr_off = next_off;
1491 bh = next;
1492 } while (bh != head);
1493
1494
1495
1496
1497
1498
1499 if (length == PAGE_SIZE)
1500 try_to_release_page(page, 0);
1501out:
1502 return;
1503}
1504EXPORT_SYMBOL(block_invalidatepage);
1505
1506
1507
1508
1509
1510
1511
1512void create_empty_buffers(struct page *page,
1513 unsigned long blocksize, unsigned long b_state)
1514{
1515 struct buffer_head *bh, *head, *tail;
1516
1517 head = alloc_page_buffers(page, blocksize, true);
1518 bh = head;
1519 do {
1520 bh->b_state |= b_state;
1521 tail = bh;
1522 bh = bh->b_this_page;
1523 } while (bh);
1524 tail->b_this_page = head;
1525
1526 spin_lock(&page->mapping->private_lock);
1527 if (PageUptodate(page) || PageDirty(page)) {
1528 bh = head;
1529 do {
1530 if (PageDirty(page))
1531 set_buffer_dirty(bh);
1532 if (PageUptodate(page))
1533 set_buffer_uptodate(bh);
1534 bh = bh->b_this_page;
1535 } while (bh != head);
1536 }
1537 attach_page_buffers(page, head);
1538 spin_unlock(&page->mapping->private_lock);
1539}
1540EXPORT_SYMBOL(create_empty_buffers);
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562void clean_bdev_aliases(struct block_device *bdev, sector_t block, sector_t len)
1563{
1564 struct inode *bd_inode = bdev->bd_inode;
1565 struct address_space *bd_mapping = bd_inode->i_mapping;
1566 struct pagevec pvec;
1567 pgoff_t index = block >> (PAGE_SHIFT - bd_inode->i_blkbits);
1568 pgoff_t end;
1569 int i, count;
1570 struct buffer_head *bh;
1571 struct buffer_head *head;
1572
1573 end = (block + len - 1) >> (PAGE_SHIFT - bd_inode->i_blkbits);
1574 pagevec_init(&pvec);
1575 while (pagevec_lookup_range(&pvec, bd_mapping, &index, end)) {
1576 count = pagevec_count(&pvec);
1577 for (i = 0; i < count; i++) {
1578 struct page *page = pvec.pages[i];
1579
1580 if (!page_has_buffers(page))
1581 continue;
1582
1583
1584
1585
1586
1587 lock_page(page);
1588
1589 if (!page_has_buffers(page))
1590 goto unlock_page;
1591 head = page_buffers(page);
1592 bh = head;
1593 do {
1594 if (!buffer_mapped(bh) || (bh->b_blocknr < block))
1595 goto next;
1596 if (bh->b_blocknr >= block + len)
1597 break;
1598 clear_buffer_dirty(bh);
1599 wait_on_buffer(bh);
1600 clear_buffer_req(bh);
1601next:
1602 bh = bh->b_this_page;
1603 } while (bh != head);
1604unlock_page:
1605 unlock_page(page);
1606 }
1607 pagevec_release(&pvec);
1608 cond_resched();
1609
1610 if (index > end || !index)
1611 break;
1612 }
1613}
1614EXPORT_SYMBOL(clean_bdev_aliases);
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624static inline int block_size_bits(unsigned int blocksize)
1625{
1626 return ilog2(blocksize);
1627}
1628
1629static struct buffer_head *create_page_buffers(struct page *page, struct inode *inode, unsigned int b_state)
1630{
1631 BUG_ON(!PageLocked(page));
1632
1633 if (!page_has_buffers(page))
1634 create_empty_buffers(page, 1 << READ_ONCE(inode->i_blkbits),
1635 b_state);
1636 return page_buffers(page);
1637}
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668int __block_write_full_page(struct inode *inode, struct page *page,
1669 get_block_t *get_block, struct writeback_control *wbc,
1670 bh_end_io_t *handler)
1671{
1672 int err;
1673 sector_t block;
1674 sector_t last_block;
1675 struct buffer_head *bh, *head;
1676 unsigned int blocksize, bbits;
1677 int nr_underway = 0;
1678 int write_flags = wbc_to_write_flags(wbc);
1679
1680 head = create_page_buffers(page, inode,
1681 (1 << BH_Dirty)|(1 << BH_Uptodate));
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693 bh = head;
1694 blocksize = bh->b_size;
1695 bbits = block_size_bits(blocksize);
1696
1697 block = (sector_t)page->index << (PAGE_SHIFT - bbits);
1698 last_block = (i_size_read(inode) - 1) >> bbits;
1699
1700
1701
1702
1703
1704 do {
1705 if (block > last_block) {
1706
1707
1708
1709
1710
1711
1712
1713
1714 clear_buffer_dirty(bh);
1715 set_buffer_uptodate(bh);
1716 } else if ((!buffer_mapped(bh) || buffer_delay(bh)) &&
1717 buffer_dirty(bh)) {
1718 WARN_ON(bh->b_size != blocksize);
1719 err = get_block(inode, block, bh, 1);
1720 if (err)
1721 goto recover;
1722 clear_buffer_delay(bh);
1723 if (buffer_new(bh)) {
1724
1725 clear_buffer_new(bh);
1726 clean_bdev_bh_alias(bh);
1727 }
1728 }
1729 bh = bh->b_this_page;
1730 block++;
1731 } while (bh != head);
1732
1733 do {
1734 if (!buffer_mapped(bh))
1735 continue;
1736
1737
1738
1739
1740
1741
1742
1743 if (wbc->sync_mode != WB_SYNC_NONE) {
1744 lock_buffer(bh);
1745 } else if (!trylock_buffer(bh)) {
1746 redirty_page_for_writepage(wbc, page);
1747 continue;
1748 }
1749 if (test_clear_buffer_dirty(bh)) {
1750 mark_buffer_async_write_endio(bh, handler);
1751 } else {
1752 unlock_buffer(bh);
1753 }
1754 } while ((bh = bh->b_this_page) != head);
1755
1756
1757
1758
1759
1760 BUG_ON(PageWriteback(page));
1761 set_page_writeback(page);
1762
1763 do {
1764 struct buffer_head *next = bh->b_this_page;
1765 if (buffer_async_write(bh)) {
1766 submit_bh_wbc(REQ_OP_WRITE, write_flags, bh,
1767 inode->i_write_hint, wbc);
1768 nr_underway++;
1769 }
1770 bh = next;
1771 } while (bh != head);
1772 unlock_page(page);
1773
1774 err = 0;
1775done:
1776 if (nr_underway == 0) {
1777
1778
1779
1780
1781
1782 end_page_writeback(page);
1783
1784
1785
1786
1787
1788 }
1789 return err;
1790
1791recover:
1792
1793
1794
1795
1796
1797
1798 bh = head;
1799
1800 do {
1801 if (buffer_mapped(bh) && buffer_dirty(bh) &&
1802 !buffer_delay(bh)) {
1803 lock_buffer(bh);
1804 mark_buffer_async_write_endio(bh, handler);
1805 } else {
1806
1807
1808
1809
1810 clear_buffer_dirty(bh);
1811 }
1812 } while ((bh = bh->b_this_page) != head);
1813 SetPageError(page);
1814 BUG_ON(PageWriteback(page));
1815 mapping_set_error(page->mapping, err);
1816 set_page_writeback(page);
1817 do {
1818 struct buffer_head *next = bh->b_this_page;
1819 if (buffer_async_write(bh)) {
1820 clear_buffer_dirty(bh);
1821 submit_bh_wbc(REQ_OP_WRITE, write_flags, bh,
1822 inode->i_write_hint, wbc);
1823 nr_underway++;
1824 }
1825 bh = next;
1826 } while (bh != head);
1827 unlock_page(page);
1828 goto done;
1829}
1830EXPORT_SYMBOL(__block_write_full_page);
1831
1832
1833
1834
1835
1836
1837void page_zero_new_buffers(struct page *page, unsigned from, unsigned to)
1838{
1839 unsigned int block_start, block_end;
1840 struct buffer_head *head, *bh;
1841
1842 BUG_ON(!PageLocked(page));
1843 if (!page_has_buffers(page))
1844 return;
1845
1846 bh = head = page_buffers(page);
1847 block_start = 0;
1848 do {
1849 block_end = block_start + bh->b_size;
1850
1851 if (buffer_new(bh)) {
1852 if (block_end > from && block_start < to) {
1853 if (!PageUptodate(page)) {
1854 unsigned start, size;
1855
1856 start = max(from, block_start);
1857 size = min(to, block_end) - start;
1858
1859 zero_user(page, start, size);
1860 set_buffer_uptodate(bh);
1861 }
1862
1863 clear_buffer_new(bh);
1864 mark_buffer_dirty(bh);
1865 }
1866 }
1867
1868 block_start = block_end;
1869 bh = bh->b_this_page;
1870 } while (bh != head);
1871}
1872EXPORT_SYMBOL(page_zero_new_buffers);
1873
1874static void
1875iomap_to_bh(struct inode *inode, sector_t block, struct buffer_head *bh,
1876 struct iomap *iomap)
1877{
1878 loff_t offset = block << inode->i_blkbits;
1879
1880 bh->b_bdev = iomap->bdev;
1881
1882
1883
1884
1885
1886
1887
1888 BUG_ON(offset >= iomap->offset + iomap->length);
1889
1890 switch (iomap->type) {
1891 case IOMAP_HOLE:
1892
1893
1894
1895
1896
1897 if (!buffer_uptodate(bh) ||
1898 (offset >= i_size_read(inode)))
1899 set_buffer_new(bh);
1900 break;
1901 case IOMAP_DELALLOC:
1902 if (!buffer_uptodate(bh) ||
1903 (offset >= i_size_read(inode)))
1904 set_buffer_new(bh);
1905 set_buffer_uptodate(bh);
1906 set_buffer_mapped(bh);
1907 set_buffer_delay(bh);
1908 break;
1909 case IOMAP_UNWRITTEN:
1910
1911
1912
1913
1914
1915 set_buffer_new(bh);
1916 set_buffer_unwritten(bh);
1917
1918 case IOMAP_MAPPED:
1919 if ((iomap->flags & IOMAP_F_NEW) ||
1920 offset >= i_size_read(inode))
1921 set_buffer_new(bh);
1922 bh->b_blocknr = (iomap->addr + offset - iomap->offset) >>
1923 inode->i_blkbits;
1924 set_buffer_mapped(bh);
1925 break;
1926 }
1927}
1928
1929int __block_write_begin_int(struct page *page, loff_t pos, unsigned len,
1930 get_block_t *get_block, struct iomap *iomap)
1931{
1932 unsigned from = pos & (PAGE_SIZE - 1);
1933 unsigned to = from + len;
1934 struct inode *inode = page->mapping->host;
1935 unsigned block_start, block_end;
1936 sector_t block;
1937 int err = 0;
1938 unsigned blocksize, bbits;
1939 struct buffer_head *bh, *head, *wait[2], **wait_bh=wait;
1940
1941 BUG_ON(!PageLocked(page));
1942 BUG_ON(from > PAGE_SIZE);
1943 BUG_ON(to > PAGE_SIZE);
1944 BUG_ON(from > to);
1945
1946 head = create_page_buffers(page, inode, 0);
1947 blocksize = head->b_size;
1948 bbits = block_size_bits(blocksize);
1949
1950 block = (sector_t)page->index << (PAGE_SHIFT - bbits);
1951
1952 for(bh = head, block_start = 0; bh != head || !block_start;
1953 block++, block_start=block_end, bh = bh->b_this_page) {
1954 block_end = block_start + blocksize;
1955 if (block_end <= from || block_start >= to) {
1956 if (PageUptodate(page)) {
1957 if (!buffer_uptodate(bh))
1958 set_buffer_uptodate(bh);
1959 }
1960 continue;
1961 }
1962 if (buffer_new(bh))
1963 clear_buffer_new(bh);
1964 if (!buffer_mapped(bh)) {
1965 WARN_ON(bh->b_size != blocksize);
1966 if (get_block) {
1967 err = get_block(inode, block, bh, 1);
1968 if (err)
1969 break;
1970 } else {
1971 iomap_to_bh(inode, block, bh, iomap);
1972 }
1973
1974 if (buffer_new(bh)) {
1975 clean_bdev_bh_alias(bh);
1976 if (PageUptodate(page)) {
1977 clear_buffer_new(bh);
1978 set_buffer_uptodate(bh);
1979 mark_buffer_dirty(bh);
1980 continue;
1981 }
1982 if (block_end > to || block_start < from)
1983 zero_user_segments(page,
1984 to, block_end,
1985 block_start, from);
1986 continue;
1987 }
1988 }
1989 if (PageUptodate(page)) {
1990 if (!buffer_uptodate(bh))
1991 set_buffer_uptodate(bh);
1992 continue;
1993 }
1994 if (!buffer_uptodate(bh) && !buffer_delay(bh) &&
1995 !buffer_unwritten(bh) &&
1996 (block_start < from || block_end > to)) {
1997 ll_rw_block(REQ_OP_READ, 0, 1, &bh);
1998 *wait_bh++=bh;
1999 }
2000 }
2001
2002
2003
2004 while(wait_bh > wait) {
2005 wait_on_buffer(*--wait_bh);
2006 if (!buffer_uptodate(*wait_bh))
2007 err = -EIO;
2008 }
2009 if (unlikely(err))
2010 page_zero_new_buffers(page, from, to);
2011 return err;
2012}
2013
2014int __block_write_begin(struct page *page, loff_t pos, unsigned len,
2015 get_block_t *get_block)
2016{
2017 return __block_write_begin_int(page, pos, len, get_block, NULL);
2018}
2019EXPORT_SYMBOL(__block_write_begin);
2020
2021static int __block_commit_write(struct inode *inode, struct page *page,
2022 unsigned from, unsigned to)
2023{
2024 unsigned block_start, block_end;
2025 int partial = 0;
2026 unsigned blocksize;
2027 struct buffer_head *bh, *head;
2028
2029 bh = head = page_buffers(page);
2030 blocksize = bh->b_size;
2031
2032 block_start = 0;
2033 do {
2034 block_end = block_start + blocksize;
2035 if (block_end <= from || block_start >= to) {
2036 if (!buffer_uptodate(bh))
2037 partial = 1;
2038 } else {
2039 set_buffer_uptodate(bh);
2040 mark_buffer_dirty(bh);
2041 }
2042 clear_buffer_new(bh);
2043
2044 block_start = block_end;
2045 bh = bh->b_this_page;
2046 } while (bh != head);
2047
2048
2049
2050
2051
2052
2053
2054 if (!partial)
2055 SetPageUptodate(page);
2056 return 0;
2057}
2058
2059
2060
2061
2062
2063
2064
2065int block_write_begin(struct address_space *mapping, loff_t pos, unsigned len,
2066 unsigned flags, struct page **pagep, get_block_t *get_block)
2067{
2068 pgoff_t index = pos >> PAGE_SHIFT;
2069 struct page *page;
2070 int status;
2071
2072 page = grab_cache_page_write_begin(mapping, index, flags);
2073 if (!page)
2074 return -ENOMEM;
2075
2076 status = __block_write_begin(page, pos, len, get_block);
2077 if (unlikely(status)) {
2078 unlock_page(page);
2079 put_page(page);
2080 page = NULL;
2081 }
2082
2083 *pagep = page;
2084 return status;
2085}
2086EXPORT_SYMBOL(block_write_begin);
2087
2088int __generic_write_end(struct inode *inode, loff_t pos, unsigned copied,
2089 struct page *page)
2090{
2091 loff_t old_size = inode->i_size;
2092 bool i_size_changed = false;
2093
2094
2095
2096
2097
2098
2099
2100
2101 if (pos + copied > inode->i_size) {
2102 i_size_write(inode, pos + copied);
2103 i_size_changed = true;
2104 }
2105
2106 unlock_page(page);
2107 put_page(page);
2108
2109 if (old_size < pos)
2110 pagecache_isize_extended(inode, old_size, pos);
2111
2112
2113
2114
2115
2116
2117 if (i_size_changed)
2118 mark_inode_dirty(inode);
2119 return copied;
2120}
2121
2122int block_write_end(struct file *file, struct address_space *mapping,
2123 loff_t pos, unsigned len, unsigned copied,
2124 struct page *page, void *fsdata)
2125{
2126 struct inode *inode = mapping->host;
2127 unsigned start;
2128
2129 start = pos & (PAGE_SIZE - 1);
2130
2131 if (unlikely(copied < len)) {
2132
2133
2134
2135
2136
2137
2138
2139
2140
2141
2142
2143
2144 if (!PageUptodate(page))
2145 copied = 0;
2146
2147 page_zero_new_buffers(page, start+copied, start+len);
2148 }
2149 flush_dcache_page(page);
2150
2151
2152 __block_commit_write(inode, page, start, start+copied);
2153
2154 return copied;
2155}
2156EXPORT_SYMBOL(block_write_end);
2157
2158int generic_write_end(struct file *file, struct address_space *mapping,
2159 loff_t pos, unsigned len, unsigned copied,
2160 struct page *page, void *fsdata)
2161{
2162 copied = block_write_end(file, mapping, pos, len, copied, page, fsdata);
2163 return __generic_write_end(mapping->host, pos, copied, page);
2164}
2165EXPORT_SYMBOL(generic_write_end);
2166
2167
2168
2169
2170
2171
2172
2173
2174int block_is_partially_uptodate(struct page *page, unsigned long from,
2175 unsigned long count)
2176{
2177 unsigned block_start, block_end, blocksize;
2178 unsigned to;
2179 struct buffer_head *bh, *head;
2180 int ret = 1;
2181
2182 if (!page_has_buffers(page))
2183 return 0;
2184
2185 head = page_buffers(page);
2186 blocksize = head->b_size;
2187 to = min_t(unsigned, PAGE_SIZE - from, count);
2188 to = from + to;
2189 if (from < blocksize && to > PAGE_SIZE - blocksize)
2190 return 0;
2191
2192 bh = head;
2193 block_start = 0;
2194 do {
2195 block_end = block_start + blocksize;
2196 if (block_end > from && block_start < to) {
2197 if (!buffer_uptodate(bh)) {
2198 ret = 0;
2199 break;
2200 }
2201 if (block_end >= to)
2202 break;
2203 }
2204 block_start = block_end;
2205 bh = bh->b_this_page;
2206 } while (bh != head);
2207
2208 return ret;
2209}
2210EXPORT_SYMBOL(block_is_partially_uptodate);
2211
2212
2213
2214
2215
2216
2217
2218
2219int block_read_full_page(struct page *page, get_block_t *get_block)
2220{
2221 struct inode *inode = page->mapping->host;
2222 sector_t iblock, lblock;
2223 struct buffer_head *bh, *head, *arr[MAX_BUF_PER_PAGE];
2224 unsigned int blocksize, bbits;
2225 int nr, i;
2226 int fully_mapped = 1;
2227
2228 head = create_page_buffers(page, inode, 0);
2229 blocksize = head->b_size;
2230 bbits = block_size_bits(blocksize);
2231
2232 iblock = (sector_t)page->index << (PAGE_SHIFT - bbits);
2233 lblock = (i_size_read(inode)+blocksize-1) >> bbits;
2234 bh = head;
2235 nr = 0;
2236 i = 0;
2237
2238 do {
2239 if (buffer_uptodate(bh))
2240 continue;
2241
2242 if (!buffer_mapped(bh)) {
2243 int err = 0;
2244
2245 fully_mapped = 0;
2246 if (iblock < lblock) {
2247 WARN_ON(bh->b_size != blocksize);
2248 err = get_block(inode, iblock, bh, 0);
2249 if (err)
2250 SetPageError(page);
2251 }
2252 if (!buffer_mapped(bh)) {
2253 zero_user(page, i * blocksize, blocksize);
2254 if (!err)
2255 set_buffer_uptodate(bh);
2256 continue;
2257 }
2258
2259
2260
2261
2262 if (buffer_uptodate(bh))
2263 continue;
2264 }
2265 arr[nr++] = bh;
2266 } while (i++, iblock++, (bh = bh->b_this_page) != head);
2267
2268 if (fully_mapped)
2269 SetPageMappedToDisk(page);
2270
2271 if (!nr) {
2272
2273
2274
2275
2276 if (!PageError(page))
2277 SetPageUptodate(page);
2278 unlock_page(page);
2279 return 0;
2280 }
2281
2282
2283 for (i = 0; i < nr; i++) {
2284 bh = arr[i];
2285 lock_buffer(bh);
2286 mark_buffer_async_read(bh);
2287 }
2288
2289
2290
2291
2292
2293
2294 for (i = 0; i < nr; i++) {
2295 bh = arr[i];
2296 if (buffer_uptodate(bh))
2297 end_buffer_async_read(bh, 1);
2298 else
2299 submit_bh(REQ_OP_READ, 0, bh);
2300 }
2301 return 0;
2302}
2303EXPORT_SYMBOL(block_read_full_page);
2304
2305
2306
2307
2308
2309int generic_cont_expand_simple(struct inode *inode, loff_t size)
2310{
2311 struct address_space *mapping = inode->i_mapping;
2312 struct page *page;
2313 void *fsdata;
2314 int err;
2315
2316 err = inode_newsize_ok(inode, size);
2317 if (err)
2318 goto out;
2319
2320 err = pagecache_write_begin(NULL, mapping, size, 0,
2321 AOP_FLAG_CONT_EXPAND, &page, &fsdata);
2322 if (err)
2323 goto out;
2324
2325 err = pagecache_write_end(NULL, mapping, size, 0, 0, page, fsdata);
2326 BUG_ON(err > 0);
2327
2328out:
2329 return err;
2330}
2331EXPORT_SYMBOL(generic_cont_expand_simple);
2332
2333static int cont_expand_zero(struct file *file, struct address_space *mapping,
2334 loff_t pos, loff_t *bytes)
2335{
2336 struct inode *inode = mapping->host;
2337 unsigned int blocksize = i_blocksize(inode);
2338 struct page *page;
2339 void *fsdata;
2340 pgoff_t index, curidx;
2341 loff_t curpos;
2342 unsigned zerofrom, offset, len;
2343 int err = 0;
2344
2345 index = pos >> PAGE_SHIFT;
2346 offset = pos & ~PAGE_MASK;
2347
2348 while (index > (curidx = (curpos = *bytes)>>PAGE_SHIFT)) {
2349 zerofrom = curpos & ~PAGE_MASK;
2350 if (zerofrom & (blocksize-1)) {
2351 *bytes |= (blocksize-1);
2352 (*bytes)++;
2353 }
2354 len = PAGE_SIZE - zerofrom;
2355
2356 err = pagecache_write_begin(file, mapping, curpos, len, 0,
2357 &page, &fsdata);
2358 if (err)
2359 goto out;
2360 zero_user(page, zerofrom, len);
2361 err = pagecache_write_end(file, mapping, curpos, len, len,
2362 page, fsdata);
2363 if (err < 0)
2364 goto out;
2365 BUG_ON(err != len);
2366 err = 0;
2367
2368 balance_dirty_pages_ratelimited(mapping);
2369
2370 if (fatal_signal_pending(current)) {
2371 err = -EINTR;
2372 goto out;
2373 }
2374 }
2375
2376
2377 if (index == curidx) {
2378 zerofrom = curpos & ~PAGE_MASK;
2379
2380 if (offset <= zerofrom) {
2381 goto out;
2382 }
2383 if (zerofrom & (blocksize-1)) {
2384 *bytes |= (blocksize-1);
2385 (*bytes)++;
2386 }
2387 len = offset - zerofrom;
2388
2389 err = pagecache_write_begin(file, mapping, curpos, len, 0,
2390 &page, &fsdata);
2391 if (err)
2392 goto out;
2393 zero_user(page, zerofrom, len);
2394 err = pagecache_write_end(file, mapping, curpos, len, len,
2395 page, fsdata);
2396 if (err < 0)
2397 goto out;
2398 BUG_ON(err != len);
2399 err = 0;
2400 }
2401out:
2402 return err;
2403}
2404
2405
2406
2407
2408
2409int cont_write_begin(struct file *file, struct address_space *mapping,
2410 loff_t pos, unsigned len, unsigned flags,
2411 struct page **pagep, void **fsdata,
2412 get_block_t *get_block, loff_t *bytes)
2413{
2414 struct inode *inode = mapping->host;
2415 unsigned int blocksize = i_blocksize(inode);
2416 unsigned int zerofrom;
2417 int err;
2418
2419 err = cont_expand_zero(file, mapping, pos, bytes);
2420 if (err)
2421 return err;
2422
2423 zerofrom = *bytes & ~PAGE_MASK;
2424 if (pos+len > *bytes && zerofrom & (blocksize-1)) {
2425 *bytes |= (blocksize-1);
2426 (*bytes)++;
2427 }
2428
2429 return block_write_begin(mapping, pos, len, flags, pagep, get_block);
2430}
2431EXPORT_SYMBOL(cont_write_begin);
2432
2433int block_commit_write(struct page *page, unsigned from, unsigned to)
2434{
2435 struct inode *inode = page->mapping->host;
2436 __block_commit_write(inode,page,from,to);
2437 return 0;
2438}
2439EXPORT_SYMBOL(block_commit_write);
2440
2441
2442
2443
2444
2445
2446
2447
2448
2449
2450
2451
2452
2453
2454
2455
2456
2457
2458
2459int block_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf,
2460 get_block_t get_block)
2461{
2462 struct page *page = vmf->page;
2463 struct inode *inode = file_inode(vma->vm_file);
2464 unsigned long end;
2465 loff_t size;
2466 int ret;
2467
2468 lock_page(page);
2469 size = i_size_read(inode);
2470 if ((page->mapping != inode->i_mapping) ||
2471 (page_offset(page) > size)) {
2472
2473 ret = -EFAULT;
2474 goto out_unlock;
2475 }
2476
2477
2478 if (((page->index + 1) << PAGE_SHIFT) > size)
2479 end = size & ~PAGE_MASK;
2480 else
2481 end = PAGE_SIZE;
2482
2483 ret = __block_write_begin(page, 0, end, get_block);
2484 if (!ret)
2485 ret = block_commit_write(page, 0, end);
2486
2487 if (unlikely(ret < 0))
2488 goto out_unlock;
2489 set_page_dirty(page);
2490 wait_for_stable_page(page);
2491 return 0;
2492out_unlock:
2493 unlock_page(page);
2494 return ret;
2495}
2496EXPORT_SYMBOL(block_page_mkwrite);
2497
2498
2499
2500
2501
2502
2503static void end_buffer_read_nobh(struct buffer_head *bh, int uptodate)
2504{
2505 __end_buffer_read_notouch(bh, uptodate);
2506}
2507
2508
2509
2510
2511
2512
2513static void attach_nobh_buffers(struct page *page, struct buffer_head *head)
2514{
2515 struct buffer_head *bh;
2516
2517 BUG_ON(!PageLocked(page));
2518
2519 spin_lock(&page->mapping->private_lock);
2520 bh = head;
2521 do {
2522 if (PageDirty(page))
2523 set_buffer_dirty(bh);
2524 if (!bh->b_this_page)
2525 bh->b_this_page = head;
2526 bh = bh->b_this_page;
2527 } while (bh != head);
2528 attach_page_buffers(page, head);
2529 spin_unlock(&page->mapping->private_lock);
2530}
2531
2532
2533
2534
2535
2536
2537int nobh_write_begin(struct address_space *mapping,
2538 loff_t pos, unsigned len, unsigned flags,
2539 struct page **pagep, void **fsdata,
2540 get_block_t *get_block)
2541{
2542 struct inode *inode = mapping->host;
2543 const unsigned blkbits = inode->i_blkbits;
2544 const unsigned blocksize = 1 << blkbits;
2545 struct buffer_head *head, *bh;
2546 struct page *page;
2547 pgoff_t index;
2548 unsigned from, to;
2549 unsigned block_in_page;
2550 unsigned block_start, block_end;
2551 sector_t block_in_file;
2552 int nr_reads = 0;
2553 int ret = 0;
2554 int is_mapped_to_disk = 1;
2555
2556 index = pos >> PAGE_SHIFT;
2557 from = pos & (PAGE_SIZE - 1);
2558 to = from + len;
2559
2560 page = grab_cache_page_write_begin(mapping, index, flags);
2561 if (!page)
2562 return -ENOMEM;
2563 *pagep = page;
2564 *fsdata = NULL;
2565
2566 if (page_has_buffers(page)) {
2567 ret = __block_write_begin(page, pos, len, get_block);
2568 if (unlikely(ret))
2569 goto out_release;
2570 return ret;
2571 }
2572
2573 if (PageMappedToDisk(page))
2574 return 0;
2575
2576
2577
2578
2579
2580
2581
2582
2583
2584
2585 head = alloc_page_buffers(page, blocksize, false);
2586 if (!head) {
2587 ret = -ENOMEM;
2588 goto out_release;
2589 }
2590
2591 block_in_file = (sector_t)page->index << (PAGE_SHIFT - blkbits);
2592
2593
2594
2595
2596
2597
2598 for (block_start = 0, block_in_page = 0, bh = head;
2599 block_start < PAGE_SIZE;
2600 block_in_page++, block_start += blocksize, bh = bh->b_this_page) {
2601 int create;
2602
2603 block_end = block_start + blocksize;
2604 bh->b_state = 0;
2605 create = 1;
2606 if (block_start >= to)
2607 create = 0;
2608 ret = get_block(inode, block_in_file + block_in_page,
2609 bh, create);
2610 if (ret)
2611 goto failed;
2612 if (!buffer_mapped(bh))
2613 is_mapped_to_disk = 0;
2614 if (buffer_new(bh))
2615 clean_bdev_bh_alias(bh);
2616 if (PageUptodate(page)) {
2617 set_buffer_uptodate(bh);
2618 continue;
2619 }
2620 if (buffer_new(bh) || !buffer_mapped(bh)) {
2621 zero_user_segments(page, block_start, from,
2622 to, block_end);
2623 continue;
2624 }
2625 if (buffer_uptodate(bh))
2626 continue;
2627 if (block_start < from || block_end > to) {
2628 lock_buffer(bh);
2629 bh->b_end_io = end_buffer_read_nobh;
2630 submit_bh(REQ_OP_READ, 0, bh);
2631 nr_reads++;
2632 }
2633 }
2634
2635 if (nr_reads) {
2636
2637
2638
2639
2640
2641 for (bh = head; bh; bh = bh->b_this_page) {
2642 wait_on_buffer(bh);
2643 if (!buffer_uptodate(bh))
2644 ret = -EIO;
2645 }
2646 if (ret)
2647 goto failed;
2648 }
2649
2650 if (is_mapped_to_disk)
2651 SetPageMappedToDisk(page);
2652
2653 *fsdata = head;
2654
2655 return 0;
2656
2657failed:
2658 BUG_ON(!ret);
2659
2660
2661
2662
2663
2664
2665
2666 attach_nobh_buffers(page, head);
2667 page_zero_new_buffers(page, from, to);
2668
2669out_release:
2670 unlock_page(page);
2671 put_page(page);
2672 *pagep = NULL;
2673
2674 return ret;
2675}
2676EXPORT_SYMBOL(nobh_write_begin);
2677
2678int nobh_write_end(struct file *file, struct address_space *mapping,
2679 loff_t pos, unsigned len, unsigned copied,
2680 struct page *page, void *fsdata)
2681{
2682 struct inode *inode = page->mapping->host;
2683 struct buffer_head *head = fsdata;
2684 struct buffer_head *bh;
2685 BUG_ON(fsdata != NULL && page_has_buffers(page));
2686
2687 if (unlikely(copied < len) && head)
2688 attach_nobh_buffers(page, head);
2689 if (page_has_buffers(page))
2690 return generic_write_end(file, mapping, pos, len,
2691 copied, page, fsdata);
2692
2693 SetPageUptodate(page);
2694 set_page_dirty(page);
2695 if (pos+copied > inode->i_size) {
2696 i_size_write(inode, pos+copied);
2697 mark_inode_dirty(inode);
2698 }
2699
2700 unlock_page(page);
2701 put_page(page);
2702
2703 while (head) {
2704 bh = head;
2705 head = head->b_this_page;
2706 free_buffer_head(bh);
2707 }
2708
2709 return copied;
2710}
2711EXPORT_SYMBOL(nobh_write_end);
2712
2713
2714
2715
2716
2717
2718int nobh_writepage(struct page *page, get_block_t *get_block,
2719 struct writeback_control *wbc)
2720{
2721 struct inode * const inode = page->mapping->host;
2722 loff_t i_size = i_size_read(inode);
2723 const pgoff_t end_index = i_size >> PAGE_SHIFT;
2724 unsigned offset;
2725 int ret;
2726
2727
2728 if (page->index < end_index)
2729 goto out;
2730
2731
2732 offset = i_size & (PAGE_SIZE-1);
2733 if (page->index >= end_index+1 || !offset) {
2734
2735
2736
2737
2738
2739#if 0
2740
2741 if (page->mapping->a_ops->invalidatepage)
2742 page->mapping->a_ops->invalidatepage(page, offset);
2743#endif
2744 unlock_page(page);
2745 return 0;
2746 }
2747
2748
2749
2750
2751
2752
2753
2754
2755 zero_user_segment(page, offset, PAGE_SIZE);
2756out:
2757 ret = mpage_writepage(page, get_block, wbc);
2758 if (ret == -EAGAIN)
2759 ret = __block_write_full_page(inode, page, get_block, wbc,
2760 end_buffer_async_write);
2761 return ret;
2762}
2763EXPORT_SYMBOL(nobh_writepage);
2764
2765int nobh_truncate_page(struct address_space *mapping,
2766 loff_t from, get_block_t *get_block)
2767{
2768 pgoff_t index = from >> PAGE_SHIFT;
2769 unsigned offset = from & (PAGE_SIZE-1);
2770 unsigned blocksize;
2771 sector_t iblock;
2772 unsigned length, pos;
2773 struct inode *inode = mapping->host;
2774 struct page *page;
2775 struct buffer_head map_bh;
2776 int err;
2777
2778 blocksize = i_blocksize(inode);
2779 length = offset & (blocksize - 1);
2780
2781
2782 if (!length)
2783 return 0;
2784
2785 length = blocksize - length;
2786 iblock = (sector_t)index << (PAGE_SHIFT - inode->i_blkbits);
2787
2788 page = grab_cache_page(mapping, index);
2789 err = -ENOMEM;
2790 if (!page)
2791 goto out;
2792
2793 if (page_has_buffers(page)) {
2794has_buffers:
2795 unlock_page(page);
2796 put_page(page);
2797 return block_truncate_page(mapping, from, get_block);
2798 }
2799
2800
2801 pos = blocksize;
2802 while (offset >= pos) {
2803 iblock++;
2804 pos += blocksize;
2805 }
2806
2807 map_bh.b_size = blocksize;
2808 map_bh.b_state = 0;
2809 err = get_block(inode, iblock, &map_bh, 0);
2810 if (err)
2811 goto unlock;
2812
2813 if (!buffer_mapped(&map_bh))
2814 goto unlock;
2815
2816
2817 if (!PageUptodate(page)) {
2818 err = mapping->a_ops->readpage(NULL, page);
2819 if (err) {
2820 put_page(page);
2821 goto out;
2822 }
2823 lock_page(page);
2824 if (!PageUptodate(page)) {
2825 err = -EIO;
2826 goto unlock;
2827 }
2828 if (page_has_buffers(page))
2829 goto has_buffers;
2830 }
2831 zero_user(page, offset, length);
2832 set_page_dirty(page);
2833 err = 0;
2834
2835unlock:
2836 unlock_page(page);
2837 put_page(page);
2838out:
2839 return err;
2840}
2841EXPORT_SYMBOL(nobh_truncate_page);
2842
2843int block_truncate_page(struct address_space *mapping,
2844 loff_t from, get_block_t *get_block)
2845{
2846 pgoff_t index = from >> PAGE_SHIFT;
2847 unsigned offset = from & (PAGE_SIZE-1);
2848 unsigned blocksize;
2849 sector_t iblock;
2850 unsigned length, pos;
2851 struct inode *inode = mapping->host;
2852 struct page *page;
2853 struct buffer_head *bh;
2854 int err;
2855
2856 blocksize = i_blocksize(inode);
2857 length = offset & (blocksize - 1);
2858
2859
2860 if (!length)
2861 return 0;
2862
2863 length = blocksize - length;
2864 iblock = (sector_t)index << (PAGE_SHIFT - inode->i_blkbits);
2865
2866 page = grab_cache_page(mapping, index);
2867 err = -ENOMEM;
2868 if (!page)
2869 goto out;
2870
2871 if (!page_has_buffers(page))
2872 create_empty_buffers(page, blocksize, 0);
2873
2874
2875 bh = page_buffers(page);
2876 pos = blocksize;
2877 while (offset >= pos) {
2878 bh = bh->b_this_page;
2879 iblock++;
2880 pos += blocksize;
2881 }
2882
2883 err = 0;
2884 if (!buffer_mapped(bh)) {
2885 WARN_ON(bh->b_size != blocksize);
2886 err = get_block(inode, iblock, bh, 0);
2887 if (err)
2888 goto unlock;
2889
2890 if (!buffer_mapped(bh))
2891 goto unlock;
2892 }
2893
2894
2895 if (PageUptodate(page))
2896 set_buffer_uptodate(bh);
2897
2898 if (!buffer_uptodate(bh) && !buffer_delay(bh) && !buffer_unwritten(bh)) {
2899 err = -EIO;
2900 ll_rw_block(REQ_OP_READ, 0, 1, &bh);
2901 wait_on_buffer(bh);
2902
2903 if (!buffer_uptodate(bh))
2904 goto unlock;
2905 }
2906
2907 zero_user(page, offset, length);
2908 mark_buffer_dirty(bh);
2909 err = 0;
2910
2911unlock:
2912 unlock_page(page);
2913 put_page(page);
2914out:
2915 return err;
2916}
2917EXPORT_SYMBOL(block_truncate_page);
2918
2919
2920
2921
2922int block_write_full_page(struct page *page, get_block_t *get_block,
2923 struct writeback_control *wbc)
2924{
2925 struct inode * const inode = page->mapping->host;
2926 loff_t i_size = i_size_read(inode);
2927 const pgoff_t end_index = i_size >> PAGE_SHIFT;
2928 unsigned offset;
2929
2930
2931 if (page->index < end_index)
2932 return __block_write_full_page(inode, page, get_block, wbc,
2933 end_buffer_async_write);
2934
2935
2936 offset = i_size & (PAGE_SIZE-1);
2937 if (page->index >= end_index+1 || !offset) {
2938
2939
2940
2941
2942
2943 do_invalidatepage(page, 0, PAGE_SIZE);
2944 unlock_page(page);
2945 return 0;
2946 }
2947
2948
2949
2950
2951
2952
2953
2954
2955 zero_user_segment(page, offset, PAGE_SIZE);
2956 return __block_write_full_page(inode, page, get_block, wbc,
2957 end_buffer_async_write);
2958}
2959EXPORT_SYMBOL(block_write_full_page);
2960
2961sector_t generic_block_bmap(struct address_space *mapping, sector_t block,
2962 get_block_t *get_block)
2963{
2964 struct inode *inode = mapping->host;
2965 struct buffer_head tmp = {
2966 .b_size = i_blocksize(inode),
2967 };
2968
2969 get_block(inode, block, &tmp, 0);
2970 return tmp.b_blocknr;
2971}
2972EXPORT_SYMBOL(generic_block_bmap);
2973
2974static void end_bio_bh_io_sync(struct bio *bio)
2975{
2976 struct buffer_head *bh = bio->bi_private;
2977
2978 if (unlikely(bio_flagged(bio, BIO_QUIET)))
2979 set_bit(BH_Quiet, &bh->b_state);
2980
2981 bh->b_end_io(bh, !bio->bi_status);
2982 bio_put(bio);
2983}
2984
2985
2986
2987
2988
2989
2990
2991
2992
2993
2994
2995
2996
2997void guard_bio_eod(int op, struct bio *bio)
2998{
2999 sector_t maxsector;
3000 struct bio_vec *bvec = bio_last_bvec_all(bio);
3001 unsigned truncated_bytes;
3002 struct hd_struct *part;
3003
3004 rcu_read_lock();
3005 part = __disk_get_part(bio->bi_disk, bio->bi_partno);
3006 if (part)
3007 maxsector = part_nr_sects_read(part);
3008 else
3009 maxsector = get_capacity(bio->bi_disk);
3010 rcu_read_unlock();
3011
3012 if (!maxsector)
3013 return;
3014
3015
3016
3017
3018
3019
3020 if (unlikely(bio->bi_iter.bi_sector >= maxsector))
3021 return;
3022
3023 maxsector -= bio->bi_iter.bi_sector;
3024 if (likely((bio->bi_iter.bi_size >> 9) <= maxsector))
3025 return;
3026
3027
3028 truncated_bytes = bio->bi_iter.bi_size - (maxsector << 9);
3029
3030
3031 bio->bi_iter.bi_size -= truncated_bytes;
3032 bvec->bv_len -= truncated_bytes;
3033
3034
3035 if (op == REQ_OP_READ) {
3036 zero_user(bvec->bv_page, bvec->bv_offset + bvec->bv_len,
3037 truncated_bytes);
3038 }
3039}
3040
3041static int submit_bh_wbc(int op, int op_flags, struct buffer_head *bh,
3042 enum rw_hint write_hint, struct writeback_control *wbc)
3043{
3044 struct bio *bio;
3045
3046 BUG_ON(!buffer_locked(bh));
3047 BUG_ON(!buffer_mapped(bh));
3048 BUG_ON(!bh->b_end_io);
3049 BUG_ON(buffer_delay(bh));
3050 BUG_ON(buffer_unwritten(bh));
3051
3052
3053
3054
3055 if (test_set_buffer_req(bh) && (op == REQ_OP_WRITE))
3056 clear_buffer_write_io_error(bh);
3057
3058
3059
3060
3061
3062 bio = bio_alloc(GFP_NOIO, 1);
3063
3064 bio->bi_iter.bi_sector = bh->b_blocknr * (bh->b_size >> 9);
3065 bio_set_dev(bio, bh->b_bdev);
3066 bio->bi_write_hint = write_hint;
3067
3068 bio_add_page(bio, bh->b_page, bh->b_size, bh_offset(bh));
3069 BUG_ON(bio->bi_iter.bi_size != bh->b_size);
3070
3071 bio->bi_end_io = end_bio_bh_io_sync;
3072 bio->bi_private = bh;
3073
3074
3075 guard_bio_eod(op, bio);
3076
3077 if (buffer_meta(bh))
3078 op_flags |= REQ_META;
3079 if (buffer_prio(bh))
3080 op_flags |= REQ_PRIO;
3081 bio_set_op_attrs(bio, op, op_flags);
3082
3083 if (wbc) {
3084 wbc_init_bio(wbc, bio);
3085 wbc_account_io(wbc, bh->b_page, bh->b_size);
3086 }
3087
3088 submit_bio(bio);
3089 return 0;
3090}
3091
3092int submit_bh(int op, int op_flags, struct buffer_head *bh)
3093{
3094 return submit_bh_wbc(op, op_flags, bh, 0, NULL);
3095}
3096EXPORT_SYMBOL(submit_bh);
3097
3098
3099
3100
3101
3102
3103
3104
3105
3106
3107
3108
3109
3110
3111
3112
3113
3114
3115
3116
3117
3118
3119
3120
3121
3122
3123
3124void ll_rw_block(int op, int op_flags, int nr, struct buffer_head *bhs[])
3125{
3126 int i;
3127
3128 for (i = 0; i < nr; i++) {
3129 struct buffer_head *bh = bhs[i];
3130
3131 if (!trylock_buffer(bh))
3132 continue;
3133 if (op == WRITE) {
3134 if (test_clear_buffer_dirty(bh)) {
3135 bh->b_end_io = end_buffer_write_sync;
3136 get_bh(bh);
3137 submit_bh(op, op_flags, bh);
3138 continue;
3139 }
3140 } else {
3141 if (!buffer_uptodate(bh)) {
3142 bh->b_end_io = end_buffer_read_sync;
3143 get_bh(bh);
3144 submit_bh(op, op_flags, bh);
3145 continue;
3146 }
3147 }
3148 unlock_buffer(bh);
3149 }
3150}
3151EXPORT_SYMBOL(ll_rw_block);
3152
3153void write_dirty_buffer(struct buffer_head *bh, int op_flags)
3154{
3155 lock_buffer(bh);
3156 if (!test_clear_buffer_dirty(bh)) {
3157 unlock_buffer(bh);
3158 return;
3159 }
3160 bh->b_end_io = end_buffer_write_sync;
3161 get_bh(bh);
3162 submit_bh(REQ_OP_WRITE, op_flags, bh);
3163}
3164EXPORT_SYMBOL(write_dirty_buffer);
3165
3166
3167
3168
3169
3170
3171int __sync_dirty_buffer(struct buffer_head *bh, int op_flags)
3172{
3173 int ret = 0;
3174
3175 WARN_ON(atomic_read(&bh->b_count) < 1);
3176 lock_buffer(bh);
3177 if (test_clear_buffer_dirty(bh)) {
3178 get_bh(bh);
3179 bh->b_end_io = end_buffer_write_sync;
3180 ret = submit_bh(REQ_OP_WRITE, op_flags, bh);
3181 wait_on_buffer(bh);
3182 if (!ret && !buffer_uptodate(bh))
3183 ret = -EIO;
3184 } else {
3185 unlock_buffer(bh);
3186 }
3187 return ret;
3188}
3189EXPORT_SYMBOL(__sync_dirty_buffer);
3190
3191int sync_dirty_buffer(struct buffer_head *bh)
3192{
3193 return __sync_dirty_buffer(bh, REQ_SYNC);
3194}
3195EXPORT_SYMBOL(sync_dirty_buffer);
3196
3197
3198
3199
3200
3201
3202
3203
3204
3205
3206
3207
3208
3209
3210
3211
3212
3213
3214
3215
3216
3217static inline int buffer_busy(struct buffer_head *bh)
3218{
3219 return atomic_read(&bh->b_count) |
3220 (bh->b_state & ((1 << BH_Dirty) | (1 << BH_Lock)));
3221}
3222
3223static int
3224drop_buffers(struct page *page, struct buffer_head **buffers_to_free)
3225{
3226 struct buffer_head *head = page_buffers(page);
3227 struct buffer_head *bh;
3228
3229 bh = head;
3230 do {
3231 if (buffer_busy(bh))
3232 goto failed;
3233 bh = bh->b_this_page;
3234 } while (bh != head);
3235
3236 do {
3237 struct buffer_head *next = bh->b_this_page;
3238
3239 if (bh->b_assoc_map)
3240 __remove_assoc_queue(bh);
3241 bh = next;
3242 } while (bh != head);
3243 *buffers_to_free = head;
3244 __clear_page_buffers(page);
3245 return 1;
3246failed:
3247 return 0;
3248}
3249
3250int try_to_free_buffers(struct page *page)
3251{
3252 struct address_space * const mapping = page->mapping;
3253 struct buffer_head *buffers_to_free = NULL;
3254 int ret = 0;
3255
3256 BUG_ON(!PageLocked(page));
3257 if (PageWriteback(page))
3258 return 0;
3259
3260 if (mapping == NULL) {
3261 ret = drop_buffers(page, &buffers_to_free);
3262 goto out;
3263 }
3264
3265 spin_lock(&mapping->private_lock);
3266 ret = drop_buffers(page, &buffers_to_free);
3267
3268
3269
3270
3271
3272
3273
3274
3275
3276
3277
3278
3279
3280
3281
3282 if (ret)
3283 cancel_dirty_page(page);
3284 spin_unlock(&mapping->private_lock);
3285out:
3286 if (buffers_to_free) {
3287 struct buffer_head *bh = buffers_to_free;
3288
3289 do {
3290 struct buffer_head *next = bh->b_this_page;
3291 free_buffer_head(bh);
3292 bh = next;
3293 } while (bh != buffers_to_free);
3294 }
3295 return ret;
3296}
3297EXPORT_SYMBOL(try_to_free_buffers);
3298
3299
3300
3301
3302
3303
3304
3305
3306SYSCALL_DEFINE2(bdflush, int, func, long, data)
3307{
3308 static int msg_count;
3309
3310 if (!capable(CAP_SYS_ADMIN))
3311 return -EPERM;
3312
3313 if (msg_count < 5) {
3314 msg_count++;
3315 printk(KERN_INFO
3316 "warning: process `%s' used the obsolete bdflush"
3317 " system call\n", current->comm);
3318 printk(KERN_INFO "Fix your initscripts?\n");
3319 }
3320
3321 if (func == 1)
3322 do_exit(0);
3323 return 0;
3324}
3325
3326
3327
3328
3329static struct kmem_cache *bh_cachep __read_mostly;
3330
3331
3332
3333
3334
3335static unsigned long max_buffer_heads;
3336
3337int buffer_heads_over_limit;
3338
3339struct bh_accounting {
3340 int nr;
3341 int ratelimit;
3342};
3343
3344static DEFINE_PER_CPU(struct bh_accounting, bh_accounting) = {0, 0};
3345
3346static void recalc_bh_state(void)
3347{
3348 int i;
3349 int tot = 0;
3350
3351 if (__this_cpu_inc_return(bh_accounting.ratelimit) - 1 < 4096)
3352 return;
3353 __this_cpu_write(bh_accounting.ratelimit, 0);
3354 for_each_online_cpu(i)
3355 tot += per_cpu(bh_accounting, i).nr;
3356 buffer_heads_over_limit = (tot > max_buffer_heads);
3357}
3358
3359struct buffer_head *alloc_buffer_head(gfp_t gfp_flags)
3360{
3361 struct buffer_head *ret = kmem_cache_zalloc(bh_cachep, gfp_flags);
3362 if (ret) {
3363 INIT_LIST_HEAD(&ret->b_assoc_buffers);
3364 preempt_disable();
3365 __this_cpu_inc(bh_accounting.nr);
3366 recalc_bh_state();
3367 preempt_enable();
3368 }
3369 return ret;
3370}
3371EXPORT_SYMBOL(alloc_buffer_head);
3372
3373void free_buffer_head(struct buffer_head *bh)
3374{
3375 BUG_ON(!list_empty(&bh->b_assoc_buffers));
3376 kmem_cache_free(bh_cachep, bh);
3377 preempt_disable();
3378 __this_cpu_dec(bh_accounting.nr);
3379 recalc_bh_state();
3380 preempt_enable();
3381}
3382EXPORT_SYMBOL(free_buffer_head);
3383
3384static int buffer_exit_cpu_dead(unsigned int cpu)
3385{
3386 int i;
3387 struct bh_lru *b = &per_cpu(bh_lrus, cpu);
3388
3389 for (i = 0; i < BH_LRU_SIZE; i++) {
3390 brelse(b->bhs[i]);
3391 b->bhs[i] = NULL;
3392 }
3393 this_cpu_add(bh_accounting.nr, per_cpu(bh_accounting, cpu).nr);
3394 per_cpu(bh_accounting, cpu).nr = 0;
3395 return 0;
3396}
3397
3398
3399
3400
3401
3402
3403
3404
3405int bh_uptodate_or_lock(struct buffer_head *bh)
3406{
3407 if (!buffer_uptodate(bh)) {
3408 lock_buffer(bh);
3409 if (!buffer_uptodate(bh))
3410 return 0;
3411 unlock_buffer(bh);
3412 }
3413 return 1;
3414}
3415EXPORT_SYMBOL(bh_uptodate_or_lock);
3416
3417
3418
3419
3420
3421
3422
3423int bh_submit_read(struct buffer_head *bh)
3424{
3425 BUG_ON(!buffer_locked(bh));
3426
3427 if (buffer_uptodate(bh)) {
3428 unlock_buffer(bh);
3429 return 0;
3430 }
3431
3432 get_bh(bh);
3433 bh->b_end_io = end_buffer_read_sync;
3434 submit_bh(REQ_OP_READ, 0, bh);
3435 wait_on_buffer(bh);
3436 if (buffer_uptodate(bh))
3437 return 0;
3438 return -EIO;
3439}
3440EXPORT_SYMBOL(bh_submit_read);
3441
3442void __init buffer_init(void)
3443{
3444 unsigned long nrpages;
3445 int ret;
3446
3447 bh_cachep = kmem_cache_create("buffer_head",
3448 sizeof(struct buffer_head), 0,
3449 (SLAB_RECLAIM_ACCOUNT|SLAB_PANIC|
3450 SLAB_MEM_SPREAD),
3451 NULL);
3452
3453
3454
3455
3456 nrpages = (nr_free_buffer_pages() * 10) / 100;
3457 max_buffer_heads = nrpages * (PAGE_SIZE / sizeof(struct buffer_head));
3458 ret = cpuhp_setup_state_nocalls(CPUHP_FS_BUFF_DEAD, "fs/buffer:dead",
3459 NULL, buffer_exit_cpu_dead);
3460 WARN_ON(ret < 0);
3461}
3462