1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21#include <linux/kernel.h>
22#include <linux/sched/signal.h>
23#include <linux/syscalls.h>
24#include <linux/fs.h>
25#include <linux/iomap.h>
26#include <linux/mm.h>
27#include <linux/percpu.h>
28#include <linux/slab.h>
29#include <linux/capability.h>
30#include <linux/blkdev.h>
31#include <linux/file.h>
32#include <linux/quotaops.h>
33#include <linux/highmem.h>
34#include <linux/export.h>
35#include <linux/backing-dev.h>
36#include <linux/writeback.h>
37#include <linux/hash.h>
38#include <linux/suspend.h>
39#include <linux/buffer_head.h>
40#include <linux/task_io_accounting_ops.h>
41#include <linux/bio.h>
42#include <linux/notifier.h>
43#include <linux/cpu.h>
44#include <linux/bitops.h>
45#include <linux/mpage.h>
46#include <linux/bit_spinlock.h>
47#include <linux/pagevec.h>
48#include <trace/events/block.h>
49
50static int fsync_buffers_list(spinlock_t *lock, struct list_head *list);
51static int submit_bh_wbc(int op, int op_flags, struct buffer_head *bh,
52 enum rw_hint hint, struct writeback_control *wbc);
53
54#define BH_ENTRY(list) list_entry((list), struct buffer_head, b_assoc_buffers)
55
56inline void touch_buffer(struct buffer_head *bh)
57{
58 trace_block_touch_buffer(bh);
59 mark_page_accessed(bh->b_page);
60}
61EXPORT_SYMBOL(touch_buffer);
62
63void __lock_buffer(struct buffer_head *bh)
64{
65 wait_on_bit_lock_io(&bh->b_state, BH_Lock, TASK_UNINTERRUPTIBLE);
66}
67EXPORT_SYMBOL(__lock_buffer);
68
69void unlock_buffer(struct buffer_head *bh)
70{
71 clear_bit_unlock(BH_Lock, &bh->b_state);
72 smp_mb__after_atomic();
73 wake_up_bit(&bh->b_state, BH_Lock);
74}
75EXPORT_SYMBOL(unlock_buffer);
76
77
78
79
80
81
82void buffer_check_dirty_writeback(struct page *page,
83 bool *dirty, bool *writeback)
84{
85 struct buffer_head *head, *bh;
86 *dirty = false;
87 *writeback = false;
88
89 BUG_ON(!PageLocked(page));
90
91 if (!page_has_buffers(page))
92 return;
93
94 if (PageWriteback(page))
95 *writeback = true;
96
97 head = page_buffers(page);
98 bh = head;
99 do {
100 if (buffer_locked(bh))
101 *writeback = true;
102
103 if (buffer_dirty(bh))
104 *dirty = true;
105
106 bh = bh->b_this_page;
107 } while (bh != head);
108}
109EXPORT_SYMBOL(buffer_check_dirty_writeback);
110
111
112
113
114
115
116void __wait_on_buffer(struct buffer_head * bh)
117{
118 wait_on_bit_io(&bh->b_state, BH_Lock, TASK_UNINTERRUPTIBLE);
119}
120EXPORT_SYMBOL(__wait_on_buffer);
121
122static void
123__clear_page_buffers(struct page *page)
124{
125 ClearPagePrivate(page);
126 set_page_private(page, 0);
127 put_page(page);
128}
129
130static void buffer_io_error(struct buffer_head *bh, char *msg)
131{
132 if (!test_bit(BH_Quiet, &bh->b_state))
133 printk_ratelimited(KERN_ERR
134 "Buffer I/O error on dev %pg, logical block %llu%s\n",
135 bh->b_bdev, (unsigned long long)bh->b_blocknr, msg);
136}
137
138
139
140
141
142
143
144
145
146static void __end_buffer_read_notouch(struct buffer_head *bh, int uptodate)
147{
148 if (uptodate) {
149 set_buffer_uptodate(bh);
150 } else {
151
152 clear_buffer_uptodate(bh);
153 }
154 unlock_buffer(bh);
155}
156
157
158
159
160
161void end_buffer_read_sync(struct buffer_head *bh, int uptodate)
162{
163 __end_buffer_read_notouch(bh, uptodate);
164 put_bh(bh);
165}
166EXPORT_SYMBOL(end_buffer_read_sync);
167
168void end_buffer_write_sync(struct buffer_head *bh, int uptodate)
169{
170 if (uptodate) {
171 set_buffer_uptodate(bh);
172 } else {
173 buffer_io_error(bh, ", lost sync page write");
174 mark_buffer_write_io_error(bh);
175 clear_buffer_uptodate(bh);
176 }
177 unlock_buffer(bh);
178 put_bh(bh);
179}
180EXPORT_SYMBOL(end_buffer_write_sync);
181
182
183
184
185
186
187
188
189
190
191
192static struct buffer_head *
193__find_get_block_slow(struct block_device *bdev, sector_t block)
194{
195 struct inode *bd_inode = bdev->bd_inode;
196 struct address_space *bd_mapping = bd_inode->i_mapping;
197 struct buffer_head *ret = NULL;
198 pgoff_t index;
199 struct buffer_head *bh;
200 struct buffer_head *head;
201 struct page *page;
202 int all_mapped = 1;
203
204 index = block >> (PAGE_SHIFT - bd_inode->i_blkbits);
205 page = find_get_page_flags(bd_mapping, index, FGP_ACCESSED);
206 if (!page)
207 goto out;
208
209 spin_lock(&bd_mapping->private_lock);
210 if (!page_has_buffers(page))
211 goto out_unlock;
212 head = page_buffers(page);
213 bh = head;
214 do {
215 if (!buffer_mapped(bh))
216 all_mapped = 0;
217 else if (bh->b_blocknr == block) {
218 ret = bh;
219 get_bh(bh);
220 goto out_unlock;
221 }
222 bh = bh->b_this_page;
223 } while (bh != head);
224
225
226
227
228
229
230 if (all_mapped) {
231 printk("__find_get_block_slow() failed. "
232 "block=%llu, b_blocknr=%llu\n",
233 (unsigned long long)block,
234 (unsigned long long)bh->b_blocknr);
235 printk("b_state=0x%08lx, b_size=%zu\n",
236 bh->b_state, bh->b_size);
237 printk("device %pg blocksize: %d\n", bdev,
238 1 << bd_inode->i_blkbits);
239 }
240out_unlock:
241 spin_unlock(&bd_mapping->private_lock);
242 put_page(page);
243out:
244 return ret;
245}
246
247
248
249
250
251static void end_buffer_async_read(struct buffer_head *bh, int uptodate)
252{
253 unsigned long flags;
254 struct buffer_head *first;
255 struct buffer_head *tmp;
256 struct page *page;
257 int page_uptodate = 1;
258
259 BUG_ON(!buffer_async_read(bh));
260
261 page = bh->b_page;
262 if (uptodate) {
263 set_buffer_uptodate(bh);
264 } else {
265 clear_buffer_uptodate(bh);
266 buffer_io_error(bh, ", async page read");
267 SetPageError(page);
268 }
269
270
271
272
273
274
275 first = page_buffers(page);
276 local_irq_save(flags);
277 bit_spin_lock(BH_Uptodate_Lock, &first->b_state);
278 clear_buffer_async_read(bh);
279 unlock_buffer(bh);
280 tmp = bh;
281 do {
282 if (!buffer_uptodate(tmp))
283 page_uptodate = 0;
284 if (buffer_async_read(tmp)) {
285 BUG_ON(!buffer_locked(tmp));
286 goto still_busy;
287 }
288 tmp = tmp->b_this_page;
289 } while (tmp != bh);
290 bit_spin_unlock(BH_Uptodate_Lock, &first->b_state);
291 local_irq_restore(flags);
292
293
294
295
296
297 if (page_uptodate && !PageError(page))
298 SetPageUptodate(page);
299 unlock_page(page);
300 return;
301
302still_busy:
303 bit_spin_unlock(BH_Uptodate_Lock, &first->b_state);
304 local_irq_restore(flags);
305 return;
306}
307
308
309
310
311
312void end_buffer_async_write(struct buffer_head *bh, int uptodate)
313{
314 unsigned long flags;
315 struct buffer_head *first;
316 struct buffer_head *tmp;
317 struct page *page;
318
319 BUG_ON(!buffer_async_write(bh));
320
321 page = bh->b_page;
322 if (uptodate) {
323 set_buffer_uptodate(bh);
324 } else {
325 buffer_io_error(bh, ", lost async page write");
326 mark_buffer_write_io_error(bh);
327 clear_buffer_uptodate(bh);
328 SetPageError(page);
329 }
330
331 first = page_buffers(page);
332 local_irq_save(flags);
333 bit_spin_lock(BH_Uptodate_Lock, &first->b_state);
334
335 clear_buffer_async_write(bh);
336 unlock_buffer(bh);
337 tmp = bh->b_this_page;
338 while (tmp != bh) {
339 if (buffer_async_write(tmp)) {
340 BUG_ON(!buffer_locked(tmp));
341 goto still_busy;
342 }
343 tmp = tmp->b_this_page;
344 }
345 bit_spin_unlock(BH_Uptodate_Lock, &first->b_state);
346 local_irq_restore(flags);
347 end_page_writeback(page);
348 return;
349
350still_busy:
351 bit_spin_unlock(BH_Uptodate_Lock, &first->b_state);
352 local_irq_restore(flags);
353 return;
354}
355EXPORT_SYMBOL(end_buffer_async_write);
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378static void mark_buffer_async_read(struct buffer_head *bh)
379{
380 bh->b_end_io = end_buffer_async_read;
381 set_buffer_async_read(bh);
382}
383
384static void mark_buffer_async_write_endio(struct buffer_head *bh,
385 bh_end_io_t *handler)
386{
387 bh->b_end_io = handler;
388 set_buffer_async_write(bh);
389}
390
391void mark_buffer_async_write(struct buffer_head *bh)
392{
393 mark_buffer_async_write_endio(bh, end_buffer_async_write);
394}
395EXPORT_SYMBOL(mark_buffer_async_write);
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450static void __remove_assoc_queue(struct buffer_head *bh)
451{
452 list_del_init(&bh->b_assoc_buffers);
453 WARN_ON(!bh->b_assoc_map);
454 bh->b_assoc_map = NULL;
455}
456
457int inode_has_buffers(struct inode *inode)
458{
459 return !list_empty(&inode->i_data.private_list);
460}
461
462
463
464
465
466
467
468
469
470
471
472static int osync_buffers_list(spinlock_t *lock, struct list_head *list)
473{
474 struct buffer_head *bh;
475 struct list_head *p;
476 int err = 0;
477
478 spin_lock(lock);
479repeat:
480 list_for_each_prev(p, list) {
481 bh = BH_ENTRY(p);
482 if (buffer_locked(bh)) {
483 get_bh(bh);
484 spin_unlock(lock);
485 wait_on_buffer(bh);
486 if (!buffer_uptodate(bh))
487 err = -EIO;
488 brelse(bh);
489 spin_lock(lock);
490 goto repeat;
491 }
492 }
493 spin_unlock(lock);
494 return err;
495}
496
497void emergency_thaw_bdev(struct super_block *sb)
498{
499 while (sb->s_bdev && !thaw_bdev(sb->s_bdev, sb))
500 printk(KERN_WARNING "Emergency Thaw on %pg\n", sb->s_bdev);
501}
502
503
504
505
506
507
508
509
510
511
512
513
514int sync_mapping_buffers(struct address_space *mapping)
515{
516 struct address_space *buffer_mapping = mapping->private_data;
517
518 if (buffer_mapping == NULL || list_empty(&mapping->private_list))
519 return 0;
520
521 return fsync_buffers_list(&buffer_mapping->private_lock,
522 &mapping->private_list);
523}
524EXPORT_SYMBOL(sync_mapping_buffers);
525
526
527
528
529
530
531
532void write_boundary_block(struct block_device *bdev,
533 sector_t bblock, unsigned blocksize)
534{
535 struct buffer_head *bh = __find_get_block(bdev, bblock + 1, blocksize);
536 if (bh) {
537 if (buffer_dirty(bh))
538 ll_rw_block(REQ_OP_WRITE, 0, 1, &bh);
539 put_bh(bh);
540 }
541}
542
543void mark_buffer_dirty_inode(struct buffer_head *bh, struct inode *inode)
544{
545 struct address_space *mapping = inode->i_mapping;
546 struct address_space *buffer_mapping = bh->b_page->mapping;
547
548 mark_buffer_dirty(bh);
549 if (!mapping->private_data) {
550 mapping->private_data = buffer_mapping;
551 } else {
552 BUG_ON(mapping->private_data != buffer_mapping);
553 }
554 if (!bh->b_assoc_map) {
555 spin_lock(&buffer_mapping->private_lock);
556 list_move_tail(&bh->b_assoc_buffers,
557 &mapping->private_list);
558 bh->b_assoc_map = mapping;
559 spin_unlock(&buffer_mapping->private_lock);
560 }
561}
562EXPORT_SYMBOL(mark_buffer_dirty_inode);
563
564
565
566
567
568
569
570
571
572
573void __set_page_dirty(struct page *page, struct address_space *mapping,
574 int warn)
575{
576 unsigned long flags;
577
578 xa_lock_irqsave(&mapping->i_pages, flags);
579 if (page->mapping) {
580 WARN_ON_ONCE(warn && !PageUptodate(page));
581 account_page_dirtied(page, mapping);
582 radix_tree_tag_set(&mapping->i_pages,
583 page_index(page), PAGECACHE_TAG_DIRTY);
584 }
585 xa_unlock_irqrestore(&mapping->i_pages, flags);
586}
587EXPORT_SYMBOL_GPL(__set_page_dirty);
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614int __set_page_dirty_buffers(struct page *page)
615{
616 int newly_dirty;
617 struct address_space *mapping = page_mapping(page);
618
619 if (unlikely(!mapping))
620 return !TestSetPageDirty(page);
621
622 spin_lock(&mapping->private_lock);
623 if (page_has_buffers(page)) {
624 struct buffer_head *head = page_buffers(page);
625 struct buffer_head *bh = head;
626
627 do {
628 set_buffer_dirty(bh);
629 bh = bh->b_this_page;
630 } while (bh != head);
631 }
632
633
634
635
636 lock_page_memcg(page);
637 newly_dirty = !TestSetPageDirty(page);
638 spin_unlock(&mapping->private_lock);
639
640 if (newly_dirty)
641 __set_page_dirty(page, mapping, 1);
642
643 unlock_page_memcg(page);
644
645 if (newly_dirty)
646 __mark_inode_dirty(mapping->host, I_DIRTY_PAGES);
647
648 return newly_dirty;
649}
650EXPORT_SYMBOL(__set_page_dirty_buffers);
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671static int fsync_buffers_list(spinlock_t *lock, struct list_head *list)
672{
673 struct buffer_head *bh;
674 struct list_head tmp;
675 struct address_space *mapping;
676 int err = 0, err2;
677 struct blk_plug plug;
678
679 INIT_LIST_HEAD(&tmp);
680 blk_start_plug(&plug);
681
682 spin_lock(lock);
683 while (!list_empty(list)) {
684 bh = BH_ENTRY(list->next);
685 mapping = bh->b_assoc_map;
686 __remove_assoc_queue(bh);
687
688
689 smp_mb();
690 if (buffer_dirty(bh) || buffer_locked(bh)) {
691 list_add(&bh->b_assoc_buffers, &tmp);
692 bh->b_assoc_map = mapping;
693 if (buffer_dirty(bh)) {
694 get_bh(bh);
695 spin_unlock(lock);
696
697
698
699
700
701
702
703 write_dirty_buffer(bh, REQ_SYNC);
704
705
706
707
708
709
710
711 brelse(bh);
712 spin_lock(lock);
713 }
714 }
715 }
716
717 spin_unlock(lock);
718 blk_finish_plug(&plug);
719 spin_lock(lock);
720
721 while (!list_empty(&tmp)) {
722 bh = BH_ENTRY(tmp.prev);
723 get_bh(bh);
724 mapping = bh->b_assoc_map;
725 __remove_assoc_queue(bh);
726
727
728 smp_mb();
729 if (buffer_dirty(bh)) {
730 list_add(&bh->b_assoc_buffers,
731 &mapping->private_list);
732 bh->b_assoc_map = mapping;
733 }
734 spin_unlock(lock);
735 wait_on_buffer(bh);
736 if (!buffer_uptodate(bh))
737 err = -EIO;
738 brelse(bh);
739 spin_lock(lock);
740 }
741
742 spin_unlock(lock);
743 err2 = osync_buffers_list(lock, list);
744 if (err)
745 return err;
746 else
747 return err2;
748}
749
750
751
752
753
754
755
756
757
758
759void invalidate_inode_buffers(struct inode *inode)
760{
761 if (inode_has_buffers(inode)) {
762 struct address_space *mapping = &inode->i_data;
763 struct list_head *list = &mapping->private_list;
764 struct address_space *buffer_mapping = mapping->private_data;
765
766 spin_lock(&buffer_mapping->private_lock);
767 while (!list_empty(list))
768 __remove_assoc_queue(BH_ENTRY(list->next));
769 spin_unlock(&buffer_mapping->private_lock);
770 }
771}
772EXPORT_SYMBOL(invalidate_inode_buffers);
773
774
775
776
777
778
779
780int remove_inode_buffers(struct inode *inode)
781{
782 int ret = 1;
783
784 if (inode_has_buffers(inode)) {
785 struct address_space *mapping = &inode->i_data;
786 struct list_head *list = &mapping->private_list;
787 struct address_space *buffer_mapping = mapping->private_data;
788
789 spin_lock(&buffer_mapping->private_lock);
790 while (!list_empty(list)) {
791 struct buffer_head *bh = BH_ENTRY(list->next);
792 if (buffer_dirty(bh)) {
793 ret = 0;
794 break;
795 }
796 __remove_assoc_queue(bh);
797 }
798 spin_unlock(&buffer_mapping->private_lock);
799 }
800 return ret;
801}
802
803
804
805
806
807
808
809
810
811
812struct buffer_head *alloc_page_buffers(struct page *page, unsigned long size,
813 bool retry)
814{
815 struct buffer_head *bh, *head;
816 gfp_t gfp = GFP_NOFS;
817 long offset;
818
819 if (retry)
820 gfp |= __GFP_NOFAIL;
821
822 head = NULL;
823 offset = PAGE_SIZE;
824 while ((offset -= size) >= 0) {
825 bh = alloc_buffer_head(gfp);
826 if (!bh)
827 goto no_grow;
828
829 bh->b_this_page = head;
830 bh->b_blocknr = -1;
831 head = bh;
832
833 bh->b_size = size;
834
835
836 set_bh_page(bh, page, offset);
837 }
838 return head;
839
840
841
842no_grow:
843 if (head) {
844 do {
845 bh = head;
846 head = head->b_this_page;
847 free_buffer_head(bh);
848 } while (head);
849 }
850
851 return NULL;
852}
853EXPORT_SYMBOL_GPL(alloc_page_buffers);
854
855static inline void
856link_dev_buffers(struct page *page, struct buffer_head *head)
857{
858 struct buffer_head *bh, *tail;
859
860 bh = head;
861 do {
862 tail = bh;
863 bh = bh->b_this_page;
864 } while (bh);
865 tail->b_this_page = head;
866 attach_page_buffers(page, head);
867}
868
869static sector_t blkdev_max_block(struct block_device *bdev, unsigned int size)
870{
871 sector_t retval = ~((sector_t)0);
872 loff_t sz = i_size_read(bdev->bd_inode);
873
874 if (sz) {
875 unsigned int sizebits = blksize_bits(size);
876 retval = (sz >> sizebits);
877 }
878 return retval;
879}
880
881
882
883
884static sector_t
885init_page_buffers(struct page *page, struct block_device *bdev,
886 sector_t block, int size)
887{
888 struct buffer_head *head = page_buffers(page);
889 struct buffer_head *bh = head;
890 int uptodate = PageUptodate(page);
891 sector_t end_block = blkdev_max_block(I_BDEV(bdev->bd_inode), size);
892
893 do {
894 if (!buffer_mapped(bh)) {
895 bh->b_end_io = NULL;
896 bh->b_private = NULL;
897 bh->b_bdev = bdev;
898 bh->b_blocknr = block;
899 if (uptodate)
900 set_buffer_uptodate(bh);
901 if (block < end_block)
902 set_buffer_mapped(bh);
903 }
904 block++;
905 bh = bh->b_this_page;
906 } while (bh != head);
907
908
909
910
911 return end_block;
912}
913
914
915
916
917
918
919static int
920grow_dev_page(struct block_device *bdev, sector_t block,
921 pgoff_t index, int size, int sizebits, gfp_t gfp)
922{
923 struct inode *inode = bdev->bd_inode;
924 struct page *page;
925 struct buffer_head *bh;
926 sector_t end_block;
927 int ret = 0;
928 gfp_t gfp_mask;
929
930 gfp_mask = mapping_gfp_constraint(inode->i_mapping, ~__GFP_FS) | gfp;
931
932
933
934
935
936
937
938 gfp_mask |= __GFP_NOFAIL;
939
940 page = find_or_create_page(inode->i_mapping, index, gfp_mask);
941
942 BUG_ON(!PageLocked(page));
943
944 if (page_has_buffers(page)) {
945 bh = page_buffers(page);
946 if (bh->b_size == size) {
947 end_block = init_page_buffers(page, bdev,
948 (sector_t)index << sizebits,
949 size);
950 goto done;
951 }
952 if (!try_to_free_buffers(page))
953 goto failed;
954 }
955
956
957
958
959 bh = alloc_page_buffers(page, size, true);
960
961
962
963
964
965
966 spin_lock(&inode->i_mapping->private_lock);
967 link_dev_buffers(page, bh);
968 end_block = init_page_buffers(page, bdev, (sector_t)index << sizebits,
969 size);
970 spin_unlock(&inode->i_mapping->private_lock);
971done:
972 ret = (block < end_block) ? 1 : -ENXIO;
973failed:
974 unlock_page(page);
975 put_page(page);
976 return ret;
977}
978
979
980
981
982
983static int
984grow_buffers(struct block_device *bdev, sector_t block, int size, gfp_t gfp)
985{
986 pgoff_t index;
987 int sizebits;
988
989 sizebits = -1;
990 do {
991 sizebits++;
992 } while ((size << sizebits) < PAGE_SIZE);
993
994 index = block >> sizebits;
995
996
997
998
999
1000 if (unlikely(index != block >> sizebits)) {
1001 printk(KERN_ERR "%s: requested out-of-range block %llu for "
1002 "device %pg\n",
1003 __func__, (unsigned long long)block,
1004 bdev);
1005 return -EIO;
1006 }
1007
1008
1009 return grow_dev_page(bdev, block, index, size, sizebits, gfp);
1010}
1011
1012static struct buffer_head *
1013__getblk_slow(struct block_device *bdev, sector_t block,
1014 unsigned size, gfp_t gfp)
1015{
1016
1017 if (unlikely(size & (bdev_logical_block_size(bdev)-1) ||
1018 (size < 512 || size > PAGE_SIZE))) {
1019 printk(KERN_ERR "getblk(): invalid block size %d requested\n",
1020 size);
1021 printk(KERN_ERR "logical block size: %d\n",
1022 bdev_logical_block_size(bdev));
1023
1024 dump_stack();
1025 return NULL;
1026 }
1027
1028 for (;;) {
1029 struct buffer_head *bh;
1030 int ret;
1031
1032 bh = __find_get_block(bdev, block, size);
1033 if (bh)
1034 return bh;
1035
1036 ret = grow_buffers(bdev, block, size, gfp);
1037 if (ret < 0)
1038 return NULL;
1039 }
1040}
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077void mark_buffer_dirty(struct buffer_head *bh)
1078{
1079 WARN_ON_ONCE(!buffer_uptodate(bh));
1080
1081 trace_block_dirty_buffer(bh);
1082
1083
1084
1085
1086
1087
1088
1089 if (buffer_dirty(bh)) {
1090 smp_mb();
1091 if (buffer_dirty(bh))
1092 return;
1093 }
1094
1095 if (!test_set_buffer_dirty(bh)) {
1096 struct page *page = bh->b_page;
1097 struct address_space *mapping = NULL;
1098
1099 lock_page_memcg(page);
1100 if (!TestSetPageDirty(page)) {
1101 mapping = page_mapping(page);
1102 if (mapping)
1103 __set_page_dirty(page, mapping, 0);
1104 }
1105 unlock_page_memcg(page);
1106 if (mapping)
1107 __mark_inode_dirty(mapping->host, I_DIRTY_PAGES);
1108 }
1109}
1110EXPORT_SYMBOL(mark_buffer_dirty);
1111
1112void mark_buffer_write_io_error(struct buffer_head *bh)
1113{
1114 set_buffer_write_io_error(bh);
1115
1116 if (bh->b_page && bh->b_page->mapping)
1117 mapping_set_error(bh->b_page->mapping, -EIO);
1118 if (bh->b_assoc_map)
1119 mapping_set_error(bh->b_assoc_map, -EIO);
1120}
1121EXPORT_SYMBOL(mark_buffer_write_io_error);
1122
1123
1124
1125
1126
1127
1128
1129
1130void __brelse(struct buffer_head * buf)
1131{
1132 if (atomic_read(&buf->b_count)) {
1133 put_bh(buf);
1134 return;
1135 }
1136 WARN(1, KERN_ERR "VFS: brelse: Trying to free free buffer\n");
1137}
1138EXPORT_SYMBOL(__brelse);
1139
1140
1141
1142
1143
1144void __bforget(struct buffer_head *bh)
1145{
1146 clear_buffer_dirty(bh);
1147 if (bh->b_assoc_map) {
1148 struct address_space *buffer_mapping = bh->b_page->mapping;
1149
1150 spin_lock(&buffer_mapping->private_lock);
1151 list_del_init(&bh->b_assoc_buffers);
1152 bh->b_assoc_map = NULL;
1153 spin_unlock(&buffer_mapping->private_lock);
1154 }
1155 __brelse(bh);
1156}
1157EXPORT_SYMBOL(__bforget);
1158
1159static struct buffer_head *__bread_slow(struct buffer_head *bh)
1160{
1161 lock_buffer(bh);
1162 if (buffer_uptodate(bh)) {
1163 unlock_buffer(bh);
1164 return bh;
1165 } else {
1166 get_bh(bh);
1167 bh->b_end_io = end_buffer_read_sync;
1168 submit_bh(REQ_OP_READ, 0, bh);
1169 wait_on_buffer(bh);
1170 if (buffer_uptodate(bh))
1171 return bh;
1172 }
1173 brelse(bh);
1174 return NULL;
1175}
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191#define BH_LRU_SIZE 16
1192
1193struct bh_lru {
1194 struct buffer_head *bhs[BH_LRU_SIZE];
1195};
1196
1197static DEFINE_PER_CPU(struct bh_lru, bh_lrus) = {{ NULL }};
1198
1199#ifdef CONFIG_SMP
1200#define bh_lru_lock() local_irq_disable()
1201#define bh_lru_unlock() local_irq_enable()
1202#else
1203#define bh_lru_lock() preempt_disable()
1204#define bh_lru_unlock() preempt_enable()
1205#endif
1206
1207static inline void check_irqs_on(void)
1208{
1209#ifdef irqs_disabled
1210 BUG_ON(irqs_disabled());
1211#endif
1212}
1213
1214
1215
1216
1217
1218
1219static void bh_lru_install(struct buffer_head *bh)
1220{
1221 struct buffer_head *evictee = bh;
1222 struct bh_lru *b;
1223 int i;
1224
1225 check_irqs_on();
1226 bh_lru_lock();
1227
1228 b = this_cpu_ptr(&bh_lrus);
1229 for (i = 0; i < BH_LRU_SIZE; i++) {
1230 swap(evictee, b->bhs[i]);
1231 if (evictee == bh) {
1232 bh_lru_unlock();
1233 return;
1234 }
1235 }
1236
1237 get_bh(bh);
1238 bh_lru_unlock();
1239 brelse(evictee);
1240}
1241
1242
1243
1244
1245static struct buffer_head *
1246lookup_bh_lru(struct block_device *bdev, sector_t block, unsigned size)
1247{
1248 struct buffer_head *ret = NULL;
1249 unsigned int i;
1250
1251 check_irqs_on();
1252 bh_lru_lock();
1253 for (i = 0; i < BH_LRU_SIZE; i++) {
1254 struct buffer_head *bh = __this_cpu_read(bh_lrus.bhs[i]);
1255
1256 if (bh && bh->b_blocknr == block && bh->b_bdev == bdev &&
1257 bh->b_size == size) {
1258 if (i) {
1259 while (i) {
1260 __this_cpu_write(bh_lrus.bhs[i],
1261 __this_cpu_read(bh_lrus.bhs[i - 1]));
1262 i--;
1263 }
1264 __this_cpu_write(bh_lrus.bhs[0], bh);
1265 }
1266 get_bh(bh);
1267 ret = bh;
1268 break;
1269 }
1270 }
1271 bh_lru_unlock();
1272 return ret;
1273}
1274
1275
1276
1277
1278
1279
1280struct buffer_head *
1281__find_get_block(struct block_device *bdev, sector_t block, unsigned size)
1282{
1283 struct buffer_head *bh = lookup_bh_lru(bdev, block, size);
1284
1285 if (bh == NULL) {
1286
1287 bh = __find_get_block_slow(bdev, block);
1288 if (bh)
1289 bh_lru_install(bh);
1290 } else
1291 touch_buffer(bh);
1292
1293 return bh;
1294}
1295EXPORT_SYMBOL(__find_get_block);
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305struct buffer_head *
1306__getblk_gfp(struct block_device *bdev, sector_t block,
1307 unsigned size, gfp_t gfp)
1308{
1309 struct buffer_head *bh = __find_get_block(bdev, block, size);
1310
1311 might_sleep();
1312 if (bh == NULL)
1313 bh = __getblk_slow(bdev, block, size, gfp);
1314 return bh;
1315}
1316EXPORT_SYMBOL(__getblk_gfp);
1317
1318
1319
1320
1321void __breadahead(struct block_device *bdev, sector_t block, unsigned size)
1322{
1323 struct buffer_head *bh = __getblk(bdev, block, size);
1324 if (likely(bh)) {
1325 ll_rw_block(REQ_OP_READ, REQ_RAHEAD, 1, &bh);
1326 brelse(bh);
1327 }
1328}
1329EXPORT_SYMBOL(__breadahead);
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343struct buffer_head *
1344__bread_gfp(struct block_device *bdev, sector_t block,
1345 unsigned size, gfp_t gfp)
1346{
1347 struct buffer_head *bh = __getblk_gfp(bdev, block, size, gfp);
1348
1349 if (likely(bh) && !buffer_uptodate(bh))
1350 bh = __bread_slow(bh);
1351 return bh;
1352}
1353EXPORT_SYMBOL(__bread_gfp);
1354
1355
1356
1357
1358
1359
1360static void invalidate_bh_lru(void *arg)
1361{
1362 struct bh_lru *b = &get_cpu_var(bh_lrus);
1363 int i;
1364
1365 for (i = 0; i < BH_LRU_SIZE; i++) {
1366 brelse(b->bhs[i]);
1367 b->bhs[i] = NULL;
1368 }
1369 put_cpu_var(bh_lrus);
1370}
1371
1372static bool has_bh_in_lru(int cpu, void *dummy)
1373{
1374 struct bh_lru *b = per_cpu_ptr(&bh_lrus, cpu);
1375 int i;
1376
1377 for (i = 0; i < BH_LRU_SIZE; i++) {
1378 if (b->bhs[i])
1379 return 1;
1380 }
1381
1382 return 0;
1383}
1384
1385void invalidate_bh_lrus(void)
1386{
1387 on_each_cpu_cond(has_bh_in_lru, invalidate_bh_lru, NULL, 1, GFP_KERNEL);
1388}
1389EXPORT_SYMBOL_GPL(invalidate_bh_lrus);
1390
1391void set_bh_page(struct buffer_head *bh,
1392 struct page *page, unsigned long offset)
1393{
1394 bh->b_page = page;
1395 BUG_ON(offset >= PAGE_SIZE);
1396 if (PageHighMem(page))
1397
1398
1399
1400 bh->b_data = (char *)(0 + offset);
1401 else
1402 bh->b_data = page_address(page) + offset;
1403}
1404EXPORT_SYMBOL(set_bh_page);
1405
1406
1407
1408
1409
1410
1411#define BUFFER_FLAGS_DISCARD \
1412 (1 << BH_Mapped | 1 << BH_New | 1 << BH_Req | \
1413 1 << BH_Delay | 1 << BH_Unwritten)
1414
1415static void discard_buffer(struct buffer_head * bh)
1416{
1417 unsigned long b_state, b_state_old;
1418
1419 lock_buffer(bh);
1420 clear_buffer_dirty(bh);
1421 bh->b_bdev = NULL;
1422 b_state = bh->b_state;
1423 for (;;) {
1424 b_state_old = cmpxchg(&bh->b_state, b_state,
1425 (b_state & ~BUFFER_FLAGS_DISCARD));
1426 if (b_state_old == b_state)
1427 break;
1428 b_state = b_state_old;
1429 }
1430 unlock_buffer(bh);
1431}
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449void block_invalidatepage(struct page *page, unsigned int offset,
1450 unsigned int length)
1451{
1452 struct buffer_head *head, *bh, *next;
1453 unsigned int curr_off = 0;
1454 unsigned int stop = length + offset;
1455
1456 BUG_ON(!PageLocked(page));
1457 if (!page_has_buffers(page))
1458 goto out;
1459
1460
1461
1462
1463 BUG_ON(stop > PAGE_SIZE || stop < length);
1464
1465 head = page_buffers(page);
1466 bh = head;
1467 do {
1468 unsigned int next_off = curr_off + bh->b_size;
1469 next = bh->b_this_page;
1470
1471
1472
1473
1474 if (next_off > stop)
1475 goto out;
1476
1477
1478
1479
1480 if (offset <= curr_off)
1481 discard_buffer(bh);
1482 curr_off = next_off;
1483 bh = next;
1484 } while (bh != head);
1485
1486
1487
1488
1489
1490
1491 if (length == PAGE_SIZE)
1492 try_to_release_page(page, 0);
1493out:
1494 return;
1495}
1496EXPORT_SYMBOL(block_invalidatepage);
1497
1498
1499
1500
1501
1502
1503
1504void create_empty_buffers(struct page *page,
1505 unsigned long blocksize, unsigned long b_state)
1506{
1507 struct buffer_head *bh, *head, *tail;
1508
1509 head = alloc_page_buffers(page, blocksize, true);
1510 bh = head;
1511 do {
1512 bh->b_state |= b_state;
1513 tail = bh;
1514 bh = bh->b_this_page;
1515 } while (bh);
1516 tail->b_this_page = head;
1517
1518 spin_lock(&page->mapping->private_lock);
1519 if (PageUptodate(page) || PageDirty(page)) {
1520 bh = head;
1521 do {
1522 if (PageDirty(page))
1523 set_buffer_dirty(bh);
1524 if (PageUptodate(page))
1525 set_buffer_uptodate(bh);
1526 bh = bh->b_this_page;
1527 } while (bh != head);
1528 }
1529 attach_page_buffers(page, head);
1530 spin_unlock(&page->mapping->private_lock);
1531}
1532EXPORT_SYMBOL(create_empty_buffers);
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554void clean_bdev_aliases(struct block_device *bdev, sector_t block, sector_t len)
1555{
1556 struct inode *bd_inode = bdev->bd_inode;
1557 struct address_space *bd_mapping = bd_inode->i_mapping;
1558 struct pagevec pvec;
1559 pgoff_t index = block >> (PAGE_SHIFT - bd_inode->i_blkbits);
1560 pgoff_t end;
1561 int i, count;
1562 struct buffer_head *bh;
1563 struct buffer_head *head;
1564
1565 end = (block + len - 1) >> (PAGE_SHIFT - bd_inode->i_blkbits);
1566 pagevec_init(&pvec);
1567 while (pagevec_lookup_range(&pvec, bd_mapping, &index, end)) {
1568 count = pagevec_count(&pvec);
1569 for (i = 0; i < count; i++) {
1570 struct page *page = pvec.pages[i];
1571
1572 if (!page_has_buffers(page))
1573 continue;
1574
1575
1576
1577
1578
1579 lock_page(page);
1580
1581 if (!page_has_buffers(page))
1582 goto unlock_page;
1583 head = page_buffers(page);
1584 bh = head;
1585 do {
1586 if (!buffer_mapped(bh) || (bh->b_blocknr < block))
1587 goto next;
1588 if (bh->b_blocknr >= block + len)
1589 break;
1590 clear_buffer_dirty(bh);
1591 wait_on_buffer(bh);
1592 clear_buffer_req(bh);
1593next:
1594 bh = bh->b_this_page;
1595 } while (bh != head);
1596unlock_page:
1597 unlock_page(page);
1598 }
1599 pagevec_release(&pvec);
1600 cond_resched();
1601
1602 if (index > end || !index)
1603 break;
1604 }
1605}
1606EXPORT_SYMBOL(clean_bdev_aliases);
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616static inline int block_size_bits(unsigned int blocksize)
1617{
1618 return ilog2(blocksize);
1619}
1620
1621static struct buffer_head *create_page_buffers(struct page *page, struct inode *inode, unsigned int b_state)
1622{
1623 BUG_ON(!PageLocked(page));
1624
1625 if (!page_has_buffers(page))
1626 create_empty_buffers(page, 1 << READ_ONCE(inode->i_blkbits),
1627 b_state);
1628 return page_buffers(page);
1629}
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660int __block_write_full_page(struct inode *inode, struct page *page,
1661 get_block_t *get_block, struct writeback_control *wbc,
1662 bh_end_io_t *handler)
1663{
1664 int err;
1665 sector_t block;
1666 sector_t last_block;
1667 struct buffer_head *bh, *head;
1668 unsigned int blocksize, bbits;
1669 int nr_underway = 0;
1670 int write_flags = wbc_to_write_flags(wbc);
1671
1672 head = create_page_buffers(page, inode,
1673 (1 << BH_Dirty)|(1 << BH_Uptodate));
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685 bh = head;
1686 blocksize = bh->b_size;
1687 bbits = block_size_bits(blocksize);
1688
1689 block = (sector_t)page->index << (PAGE_SHIFT - bbits);
1690 last_block = (i_size_read(inode) - 1) >> bbits;
1691
1692
1693
1694
1695
1696 do {
1697 if (block > last_block) {
1698
1699
1700
1701
1702
1703
1704
1705
1706 clear_buffer_dirty(bh);
1707 set_buffer_uptodate(bh);
1708 } else if ((!buffer_mapped(bh) || buffer_delay(bh)) &&
1709 buffer_dirty(bh)) {
1710 WARN_ON(bh->b_size != blocksize);
1711 err = get_block(inode, block, bh, 1);
1712 if (err)
1713 goto recover;
1714 clear_buffer_delay(bh);
1715 if (buffer_new(bh)) {
1716
1717 clear_buffer_new(bh);
1718 clean_bdev_bh_alias(bh);
1719 }
1720 }
1721 bh = bh->b_this_page;
1722 block++;
1723 } while (bh != head);
1724
1725 do {
1726 if (!buffer_mapped(bh))
1727 continue;
1728
1729
1730
1731
1732
1733
1734
1735 if (wbc->sync_mode != WB_SYNC_NONE) {
1736 lock_buffer(bh);
1737 } else if (!trylock_buffer(bh)) {
1738 redirty_page_for_writepage(wbc, page);
1739 continue;
1740 }
1741 if (test_clear_buffer_dirty(bh)) {
1742 mark_buffer_async_write_endio(bh, handler);
1743 } else {
1744 unlock_buffer(bh);
1745 }
1746 } while ((bh = bh->b_this_page) != head);
1747
1748
1749
1750
1751
1752 BUG_ON(PageWriteback(page));
1753 set_page_writeback(page);
1754
1755 do {
1756 struct buffer_head *next = bh->b_this_page;
1757 if (buffer_async_write(bh)) {
1758 submit_bh_wbc(REQ_OP_WRITE, write_flags, bh,
1759 inode->i_write_hint, wbc);
1760 nr_underway++;
1761 }
1762 bh = next;
1763 } while (bh != head);
1764 unlock_page(page);
1765
1766 err = 0;
1767done:
1768 if (nr_underway == 0) {
1769
1770
1771
1772
1773
1774 end_page_writeback(page);
1775
1776
1777
1778
1779
1780 }
1781 return err;
1782
1783recover:
1784
1785
1786
1787
1788
1789
1790 bh = head;
1791
1792 do {
1793 if (buffer_mapped(bh) && buffer_dirty(bh) &&
1794 !buffer_delay(bh)) {
1795 lock_buffer(bh);
1796 mark_buffer_async_write_endio(bh, handler);
1797 } else {
1798
1799
1800
1801
1802 clear_buffer_dirty(bh);
1803 }
1804 } while ((bh = bh->b_this_page) != head);
1805 SetPageError(page);
1806 BUG_ON(PageWriteback(page));
1807 mapping_set_error(page->mapping, err);
1808 set_page_writeback(page);
1809 do {
1810 struct buffer_head *next = bh->b_this_page;
1811 if (buffer_async_write(bh)) {
1812 clear_buffer_dirty(bh);
1813 submit_bh_wbc(REQ_OP_WRITE, write_flags, bh,
1814 inode->i_write_hint, wbc);
1815 nr_underway++;
1816 }
1817 bh = next;
1818 } while (bh != head);
1819 unlock_page(page);
1820 goto done;
1821}
1822EXPORT_SYMBOL(__block_write_full_page);
1823
1824
1825
1826
1827
1828
1829void page_zero_new_buffers(struct page *page, unsigned from, unsigned to)
1830{
1831 unsigned int block_start, block_end;
1832 struct buffer_head *head, *bh;
1833
1834 BUG_ON(!PageLocked(page));
1835 if (!page_has_buffers(page))
1836 return;
1837
1838 bh = head = page_buffers(page);
1839 block_start = 0;
1840 do {
1841 block_end = block_start + bh->b_size;
1842
1843 if (buffer_new(bh)) {
1844 if (block_end > from && block_start < to) {
1845 if (!PageUptodate(page)) {
1846 unsigned start, size;
1847
1848 start = max(from, block_start);
1849 size = min(to, block_end) - start;
1850
1851 zero_user(page, start, size);
1852 set_buffer_uptodate(bh);
1853 }
1854
1855 clear_buffer_new(bh);
1856 mark_buffer_dirty(bh);
1857 }
1858 }
1859
1860 block_start = block_end;
1861 bh = bh->b_this_page;
1862 } while (bh != head);
1863}
1864EXPORT_SYMBOL(page_zero_new_buffers);
1865
1866static void
1867iomap_to_bh(struct inode *inode, sector_t block, struct buffer_head *bh,
1868 struct iomap *iomap)
1869{
1870 loff_t offset = block << inode->i_blkbits;
1871
1872 bh->b_bdev = iomap->bdev;
1873
1874
1875
1876
1877
1878
1879
1880 BUG_ON(offset >= iomap->offset + iomap->length);
1881
1882 switch (iomap->type) {
1883 case IOMAP_HOLE:
1884
1885
1886
1887
1888
1889 if (!buffer_uptodate(bh) ||
1890 (offset >= i_size_read(inode)))
1891 set_buffer_new(bh);
1892 break;
1893 case IOMAP_DELALLOC:
1894 if (!buffer_uptodate(bh) ||
1895 (offset >= i_size_read(inode)))
1896 set_buffer_new(bh);
1897 set_buffer_uptodate(bh);
1898 set_buffer_mapped(bh);
1899 set_buffer_delay(bh);
1900 break;
1901 case IOMAP_UNWRITTEN:
1902
1903
1904
1905
1906
1907 set_buffer_new(bh);
1908 set_buffer_unwritten(bh);
1909
1910 case IOMAP_MAPPED:
1911 if (offset >= i_size_read(inode))
1912 set_buffer_new(bh);
1913 bh->b_blocknr = (iomap->addr + offset - iomap->offset) >>
1914 inode->i_blkbits;
1915 set_buffer_mapped(bh);
1916 break;
1917 }
1918}
1919
1920int __block_write_begin_int(struct page *page, loff_t pos, unsigned len,
1921 get_block_t *get_block, struct iomap *iomap)
1922{
1923 unsigned from = pos & (PAGE_SIZE - 1);
1924 unsigned to = from + len;
1925 struct inode *inode = page->mapping->host;
1926 unsigned block_start, block_end;
1927 sector_t block;
1928 int err = 0;
1929 unsigned blocksize, bbits;
1930 struct buffer_head *bh, *head, *wait[2], **wait_bh=wait;
1931
1932 BUG_ON(!PageLocked(page));
1933 BUG_ON(from > PAGE_SIZE);
1934 BUG_ON(to > PAGE_SIZE);
1935 BUG_ON(from > to);
1936
1937 head = create_page_buffers(page, inode, 0);
1938 blocksize = head->b_size;
1939 bbits = block_size_bits(blocksize);
1940
1941 block = (sector_t)page->index << (PAGE_SHIFT - bbits);
1942
1943 for(bh = head, block_start = 0; bh != head || !block_start;
1944 block++, block_start=block_end, bh = bh->b_this_page) {
1945 block_end = block_start + blocksize;
1946 if (block_end <= from || block_start >= to) {
1947 if (PageUptodate(page)) {
1948 if (!buffer_uptodate(bh))
1949 set_buffer_uptodate(bh);
1950 }
1951 continue;
1952 }
1953 if (buffer_new(bh))
1954 clear_buffer_new(bh);
1955 if (!buffer_mapped(bh)) {
1956 WARN_ON(bh->b_size != blocksize);
1957 if (get_block) {
1958 err = get_block(inode, block, bh, 1);
1959 if (err)
1960 break;
1961 } else {
1962 iomap_to_bh(inode, block, bh, iomap);
1963 }
1964
1965 if (buffer_new(bh)) {
1966 clean_bdev_bh_alias(bh);
1967 if (PageUptodate(page)) {
1968 clear_buffer_new(bh);
1969 set_buffer_uptodate(bh);
1970 mark_buffer_dirty(bh);
1971 continue;
1972 }
1973 if (block_end > to || block_start < from)
1974 zero_user_segments(page,
1975 to, block_end,
1976 block_start, from);
1977 continue;
1978 }
1979 }
1980 if (PageUptodate(page)) {
1981 if (!buffer_uptodate(bh))
1982 set_buffer_uptodate(bh);
1983 continue;
1984 }
1985 if (!buffer_uptodate(bh) && !buffer_delay(bh) &&
1986 !buffer_unwritten(bh) &&
1987 (block_start < from || block_end > to)) {
1988 ll_rw_block(REQ_OP_READ, 0, 1, &bh);
1989 *wait_bh++=bh;
1990 }
1991 }
1992
1993
1994
1995 while(wait_bh > wait) {
1996 wait_on_buffer(*--wait_bh);
1997 if (!buffer_uptodate(*wait_bh))
1998 err = -EIO;
1999 }
2000 if (unlikely(err))
2001 page_zero_new_buffers(page, from, to);
2002 return err;
2003}
2004
2005int __block_write_begin(struct page *page, loff_t pos, unsigned len,
2006 get_block_t *get_block)
2007{
2008 return __block_write_begin_int(page, pos, len, get_block, NULL);
2009}
2010EXPORT_SYMBOL(__block_write_begin);
2011
2012static int __block_commit_write(struct inode *inode, struct page *page,
2013 unsigned from, unsigned to)
2014{
2015 unsigned block_start, block_end;
2016 int partial = 0;
2017 unsigned blocksize;
2018 struct buffer_head *bh, *head;
2019
2020 bh = head = page_buffers(page);
2021 blocksize = bh->b_size;
2022
2023 block_start = 0;
2024 do {
2025 block_end = block_start + blocksize;
2026 if (block_end <= from || block_start >= to) {
2027 if (!buffer_uptodate(bh))
2028 partial = 1;
2029 } else {
2030 set_buffer_uptodate(bh);
2031 mark_buffer_dirty(bh);
2032 }
2033 clear_buffer_new(bh);
2034
2035 block_start = block_end;
2036 bh = bh->b_this_page;
2037 } while (bh != head);
2038
2039
2040
2041
2042
2043
2044
2045 if (!partial)
2046 SetPageUptodate(page);
2047 return 0;
2048}
2049
2050
2051
2052
2053
2054
2055
2056int block_write_begin(struct address_space *mapping, loff_t pos, unsigned len,
2057 unsigned flags, struct page **pagep, get_block_t *get_block)
2058{
2059 pgoff_t index = pos >> PAGE_SHIFT;
2060 struct page *page;
2061 int status;
2062
2063 page = grab_cache_page_write_begin(mapping, index, flags);
2064 if (!page)
2065 return -ENOMEM;
2066
2067 status = __block_write_begin(page, pos, len, get_block);
2068 if (unlikely(status)) {
2069 unlock_page(page);
2070 put_page(page);
2071 page = NULL;
2072 }
2073
2074 *pagep = page;
2075 return status;
2076}
2077EXPORT_SYMBOL(block_write_begin);
2078
2079int block_write_end(struct file *file, struct address_space *mapping,
2080 loff_t pos, unsigned len, unsigned copied,
2081 struct page *page, void *fsdata)
2082{
2083 struct inode *inode = mapping->host;
2084 unsigned start;
2085
2086 start = pos & (PAGE_SIZE - 1);
2087
2088 if (unlikely(copied < len)) {
2089
2090
2091
2092
2093
2094
2095
2096
2097
2098
2099
2100
2101 if (!PageUptodate(page))
2102 copied = 0;
2103
2104 page_zero_new_buffers(page, start+copied, start+len);
2105 }
2106 flush_dcache_page(page);
2107
2108
2109 __block_commit_write(inode, page, start, start+copied);
2110
2111 return copied;
2112}
2113EXPORT_SYMBOL(block_write_end);
2114
2115int generic_write_end(struct file *file, struct address_space *mapping,
2116 loff_t pos, unsigned len, unsigned copied,
2117 struct page *page, void *fsdata)
2118{
2119 struct inode *inode = mapping->host;
2120 loff_t old_size = inode->i_size;
2121 int i_size_changed = 0;
2122
2123 copied = block_write_end(file, mapping, pos, len, copied, page, fsdata);
2124
2125
2126
2127
2128
2129
2130
2131
2132 if (pos+copied > inode->i_size) {
2133 i_size_write(inode, pos+copied);
2134 i_size_changed = 1;
2135 }
2136
2137 unlock_page(page);
2138 put_page(page);
2139
2140 if (old_size < pos)
2141 pagecache_isize_extended(inode, old_size, pos);
2142
2143
2144
2145
2146
2147
2148 if (i_size_changed)
2149 mark_inode_dirty(inode);
2150
2151 return copied;
2152}
2153EXPORT_SYMBOL(generic_write_end);
2154
2155
2156
2157
2158
2159
2160
2161
2162int block_is_partially_uptodate(struct page *page, unsigned long from,
2163 unsigned long count)
2164{
2165 unsigned block_start, block_end, blocksize;
2166 unsigned to;
2167 struct buffer_head *bh, *head;
2168 int ret = 1;
2169
2170 if (!page_has_buffers(page))
2171 return 0;
2172
2173 head = page_buffers(page);
2174 blocksize = head->b_size;
2175 to = min_t(unsigned, PAGE_SIZE - from, count);
2176 to = from + to;
2177 if (from < blocksize && to > PAGE_SIZE - blocksize)
2178 return 0;
2179
2180 bh = head;
2181 block_start = 0;
2182 do {
2183 block_end = block_start + blocksize;
2184 if (block_end > from && block_start < to) {
2185 if (!buffer_uptodate(bh)) {
2186 ret = 0;
2187 break;
2188 }
2189 if (block_end >= to)
2190 break;
2191 }
2192 block_start = block_end;
2193 bh = bh->b_this_page;
2194 } while (bh != head);
2195
2196 return ret;
2197}
2198EXPORT_SYMBOL(block_is_partially_uptodate);
2199
2200
2201
2202
2203
2204
2205
2206
2207int block_read_full_page(struct page *page, get_block_t *get_block)
2208{
2209 struct inode *inode = page->mapping->host;
2210 sector_t iblock, lblock;
2211 struct buffer_head *bh, *head, *arr[MAX_BUF_PER_PAGE];
2212 unsigned int blocksize, bbits;
2213 int nr, i;
2214 int fully_mapped = 1;
2215
2216 head = create_page_buffers(page, inode, 0);
2217 blocksize = head->b_size;
2218 bbits = block_size_bits(blocksize);
2219
2220 iblock = (sector_t)page->index << (PAGE_SHIFT - bbits);
2221 lblock = (i_size_read(inode)+blocksize-1) >> bbits;
2222 bh = head;
2223 nr = 0;
2224 i = 0;
2225
2226 do {
2227 if (buffer_uptodate(bh))
2228 continue;
2229
2230 if (!buffer_mapped(bh)) {
2231 int err = 0;
2232
2233 fully_mapped = 0;
2234 if (iblock < lblock) {
2235 WARN_ON(bh->b_size != blocksize);
2236 err = get_block(inode, iblock, bh, 0);
2237 if (err)
2238 SetPageError(page);
2239 }
2240 if (!buffer_mapped(bh)) {
2241 zero_user(page, i * blocksize, blocksize);
2242 if (!err)
2243 set_buffer_uptodate(bh);
2244 continue;
2245 }
2246
2247
2248
2249
2250 if (buffer_uptodate(bh))
2251 continue;
2252 }
2253 arr[nr++] = bh;
2254 } while (i++, iblock++, (bh = bh->b_this_page) != head);
2255
2256 if (fully_mapped)
2257 SetPageMappedToDisk(page);
2258
2259 if (!nr) {
2260
2261
2262
2263
2264 if (!PageError(page))
2265 SetPageUptodate(page);
2266 unlock_page(page);
2267 return 0;
2268 }
2269
2270
2271 for (i = 0; i < nr; i++) {
2272 bh = arr[i];
2273 lock_buffer(bh);
2274 mark_buffer_async_read(bh);
2275 }
2276
2277
2278
2279
2280
2281
2282 for (i = 0; i < nr; i++) {
2283 bh = arr[i];
2284 if (buffer_uptodate(bh))
2285 end_buffer_async_read(bh, 1);
2286 else
2287 submit_bh(REQ_OP_READ, 0, bh);
2288 }
2289 return 0;
2290}
2291EXPORT_SYMBOL(block_read_full_page);
2292
2293
2294
2295
2296
2297int generic_cont_expand_simple(struct inode *inode, loff_t size)
2298{
2299 struct address_space *mapping = inode->i_mapping;
2300 struct page *page;
2301 void *fsdata;
2302 int err;
2303
2304 err = inode_newsize_ok(inode, size);
2305 if (err)
2306 goto out;
2307
2308 err = pagecache_write_begin(NULL, mapping, size, 0,
2309 AOP_FLAG_CONT_EXPAND, &page, &fsdata);
2310 if (err)
2311 goto out;
2312
2313 err = pagecache_write_end(NULL, mapping, size, 0, 0, page, fsdata);
2314 BUG_ON(err > 0);
2315
2316out:
2317 return err;
2318}
2319EXPORT_SYMBOL(generic_cont_expand_simple);
2320
2321static int cont_expand_zero(struct file *file, struct address_space *mapping,
2322 loff_t pos, loff_t *bytes)
2323{
2324 struct inode *inode = mapping->host;
2325 unsigned int blocksize = i_blocksize(inode);
2326 struct page *page;
2327 void *fsdata;
2328 pgoff_t index, curidx;
2329 loff_t curpos;
2330 unsigned zerofrom, offset, len;
2331 int err = 0;
2332
2333 index = pos >> PAGE_SHIFT;
2334 offset = pos & ~PAGE_MASK;
2335
2336 while (index > (curidx = (curpos = *bytes)>>PAGE_SHIFT)) {
2337 zerofrom = curpos & ~PAGE_MASK;
2338 if (zerofrom & (blocksize-1)) {
2339 *bytes |= (blocksize-1);
2340 (*bytes)++;
2341 }
2342 len = PAGE_SIZE - zerofrom;
2343
2344 err = pagecache_write_begin(file, mapping, curpos, len, 0,
2345 &page, &fsdata);
2346 if (err)
2347 goto out;
2348 zero_user(page, zerofrom, len);
2349 err = pagecache_write_end(file, mapping, curpos, len, len,
2350 page, fsdata);
2351 if (err < 0)
2352 goto out;
2353 BUG_ON(err != len);
2354 err = 0;
2355
2356 balance_dirty_pages_ratelimited(mapping);
2357
2358 if (unlikely(fatal_signal_pending(current))) {
2359 err = -EINTR;
2360 goto out;
2361 }
2362 }
2363
2364
2365 if (index == curidx) {
2366 zerofrom = curpos & ~PAGE_MASK;
2367
2368 if (offset <= zerofrom) {
2369 goto out;
2370 }
2371 if (zerofrom & (blocksize-1)) {
2372 *bytes |= (blocksize-1);
2373 (*bytes)++;
2374 }
2375 len = offset - zerofrom;
2376
2377 err = pagecache_write_begin(file, mapping, curpos, len, 0,
2378 &page, &fsdata);
2379 if (err)
2380 goto out;
2381 zero_user(page, zerofrom, len);
2382 err = pagecache_write_end(file, mapping, curpos, len, len,
2383 page, fsdata);
2384 if (err < 0)
2385 goto out;
2386 BUG_ON(err != len);
2387 err = 0;
2388 }
2389out:
2390 return err;
2391}
2392
2393
2394
2395
2396
2397int cont_write_begin(struct file *file, struct address_space *mapping,
2398 loff_t pos, unsigned len, unsigned flags,
2399 struct page **pagep, void **fsdata,
2400 get_block_t *get_block, loff_t *bytes)
2401{
2402 struct inode *inode = mapping->host;
2403 unsigned int blocksize = i_blocksize(inode);
2404 unsigned int zerofrom;
2405 int err;
2406
2407 err = cont_expand_zero(file, mapping, pos, bytes);
2408 if (err)
2409 return err;
2410
2411 zerofrom = *bytes & ~PAGE_MASK;
2412 if (pos+len > *bytes && zerofrom & (blocksize-1)) {
2413 *bytes |= (blocksize-1);
2414 (*bytes)++;
2415 }
2416
2417 return block_write_begin(mapping, pos, len, flags, pagep, get_block);
2418}
2419EXPORT_SYMBOL(cont_write_begin);
2420
2421int block_commit_write(struct page *page, unsigned from, unsigned to)
2422{
2423 struct inode *inode = page->mapping->host;
2424 __block_commit_write(inode,page,from,to);
2425 return 0;
2426}
2427EXPORT_SYMBOL(block_commit_write);
2428
2429
2430
2431
2432
2433
2434
2435
2436
2437
2438
2439
2440
2441
2442
2443
2444
2445
2446
2447int block_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf,
2448 get_block_t get_block)
2449{
2450 struct page *page = vmf->page;
2451 struct inode *inode = file_inode(vma->vm_file);
2452 unsigned long end;
2453 loff_t size;
2454 int ret;
2455
2456 lock_page(page);
2457 size = i_size_read(inode);
2458 if ((page->mapping != inode->i_mapping) ||
2459 (page_offset(page) > size)) {
2460
2461 ret = -EFAULT;
2462 goto out_unlock;
2463 }
2464
2465
2466 if (((page->index + 1) << PAGE_SHIFT) > size)
2467 end = size & ~PAGE_MASK;
2468 else
2469 end = PAGE_SIZE;
2470
2471 ret = __block_write_begin(page, 0, end, get_block);
2472 if (!ret)
2473 ret = block_commit_write(page, 0, end);
2474
2475 if (unlikely(ret < 0))
2476 goto out_unlock;
2477 set_page_dirty(page);
2478 wait_for_stable_page(page);
2479 return 0;
2480out_unlock:
2481 unlock_page(page);
2482 return ret;
2483}
2484EXPORT_SYMBOL(block_page_mkwrite);
2485
2486
2487
2488
2489
2490
2491static void end_buffer_read_nobh(struct buffer_head *bh, int uptodate)
2492{
2493 __end_buffer_read_notouch(bh, uptodate);
2494}
2495
2496
2497
2498
2499
2500
2501static void attach_nobh_buffers(struct page *page, struct buffer_head *head)
2502{
2503 struct buffer_head *bh;
2504
2505 BUG_ON(!PageLocked(page));
2506
2507 spin_lock(&page->mapping->private_lock);
2508 bh = head;
2509 do {
2510 if (PageDirty(page))
2511 set_buffer_dirty(bh);
2512 if (!bh->b_this_page)
2513 bh->b_this_page = head;
2514 bh = bh->b_this_page;
2515 } while (bh != head);
2516 attach_page_buffers(page, head);
2517 spin_unlock(&page->mapping->private_lock);
2518}
2519
2520
2521
2522
2523
2524
2525int nobh_write_begin(struct address_space *mapping,
2526 loff_t pos, unsigned len, unsigned flags,
2527 struct page **pagep, void **fsdata,
2528 get_block_t *get_block)
2529{
2530 struct inode *inode = mapping->host;
2531 const unsigned blkbits = inode->i_blkbits;
2532 const unsigned blocksize = 1 << blkbits;
2533 struct buffer_head *head, *bh;
2534 struct page *page;
2535 pgoff_t index;
2536 unsigned from, to;
2537 unsigned block_in_page;
2538 unsigned block_start, block_end;
2539 sector_t block_in_file;
2540 int nr_reads = 0;
2541 int ret = 0;
2542 int is_mapped_to_disk = 1;
2543
2544 index = pos >> PAGE_SHIFT;
2545 from = pos & (PAGE_SIZE - 1);
2546 to = from + len;
2547
2548 page = grab_cache_page_write_begin(mapping, index, flags);
2549 if (!page)
2550 return -ENOMEM;
2551 *pagep = page;
2552 *fsdata = NULL;
2553
2554 if (page_has_buffers(page)) {
2555 ret = __block_write_begin(page, pos, len, get_block);
2556 if (unlikely(ret))
2557 goto out_release;
2558 return ret;
2559 }
2560
2561 if (PageMappedToDisk(page))
2562 return 0;
2563
2564
2565
2566
2567
2568
2569
2570
2571
2572
2573 head = alloc_page_buffers(page, blocksize, false);
2574 if (!head) {
2575 ret = -ENOMEM;
2576 goto out_release;
2577 }
2578
2579 block_in_file = (sector_t)page->index << (PAGE_SHIFT - blkbits);
2580
2581
2582
2583
2584
2585
2586 for (block_start = 0, block_in_page = 0, bh = head;
2587 block_start < PAGE_SIZE;
2588 block_in_page++, block_start += blocksize, bh = bh->b_this_page) {
2589 int create;
2590
2591 block_end = block_start + blocksize;
2592 bh->b_state = 0;
2593 create = 1;
2594 if (block_start >= to)
2595 create = 0;
2596 ret = get_block(inode, block_in_file + block_in_page,
2597 bh, create);
2598 if (ret)
2599 goto failed;
2600 if (!buffer_mapped(bh))
2601 is_mapped_to_disk = 0;
2602 if (buffer_new(bh))
2603 clean_bdev_bh_alias(bh);
2604 if (PageUptodate(page)) {
2605 set_buffer_uptodate(bh);
2606 continue;
2607 }
2608 if (buffer_new(bh) || !buffer_mapped(bh)) {
2609 zero_user_segments(page, block_start, from,
2610 to, block_end);
2611 continue;
2612 }
2613 if (buffer_uptodate(bh))
2614 continue;
2615 if (block_start < from || block_end > to) {
2616 lock_buffer(bh);
2617 bh->b_end_io = end_buffer_read_nobh;
2618 submit_bh(REQ_OP_READ, 0, bh);
2619 nr_reads++;
2620 }
2621 }
2622
2623 if (nr_reads) {
2624
2625
2626
2627
2628
2629 for (bh = head; bh; bh = bh->b_this_page) {
2630 wait_on_buffer(bh);
2631 if (!buffer_uptodate(bh))
2632 ret = -EIO;
2633 }
2634 if (ret)
2635 goto failed;
2636 }
2637
2638 if (is_mapped_to_disk)
2639 SetPageMappedToDisk(page);
2640
2641 *fsdata = head;
2642
2643 return 0;
2644
2645failed:
2646 BUG_ON(!ret);
2647
2648
2649
2650
2651
2652
2653
2654 attach_nobh_buffers(page, head);
2655 page_zero_new_buffers(page, from, to);
2656
2657out_release:
2658 unlock_page(page);
2659 put_page(page);
2660 *pagep = NULL;
2661
2662 return ret;
2663}
2664EXPORT_SYMBOL(nobh_write_begin);
2665
2666int nobh_write_end(struct file *file, struct address_space *mapping,
2667 loff_t pos, unsigned len, unsigned copied,
2668 struct page *page, void *fsdata)
2669{
2670 struct inode *inode = page->mapping->host;
2671 struct buffer_head *head = fsdata;
2672 struct buffer_head *bh;
2673 BUG_ON(fsdata != NULL && page_has_buffers(page));
2674
2675 if (unlikely(copied < len) && head)
2676 attach_nobh_buffers(page, head);
2677 if (page_has_buffers(page))
2678 return generic_write_end(file, mapping, pos, len,
2679 copied, page, fsdata);
2680
2681 SetPageUptodate(page);
2682 set_page_dirty(page);
2683 if (pos+copied > inode->i_size) {
2684 i_size_write(inode, pos+copied);
2685 mark_inode_dirty(inode);
2686 }
2687
2688 unlock_page(page);
2689 put_page(page);
2690
2691 while (head) {
2692 bh = head;
2693 head = head->b_this_page;
2694 free_buffer_head(bh);
2695 }
2696
2697 return copied;
2698}
2699EXPORT_SYMBOL(nobh_write_end);
2700
2701
2702
2703
2704
2705
2706int nobh_writepage(struct page *page, get_block_t *get_block,
2707 struct writeback_control *wbc)
2708{
2709 struct inode * const inode = page->mapping->host;
2710 loff_t i_size = i_size_read(inode);
2711 const pgoff_t end_index = i_size >> PAGE_SHIFT;
2712 unsigned offset;
2713 int ret;
2714
2715
2716 if (page->index < end_index)
2717 goto out;
2718
2719
2720 offset = i_size & (PAGE_SIZE-1);
2721 if (page->index >= end_index+1 || !offset) {
2722
2723
2724
2725
2726
2727#if 0
2728
2729 if (page->mapping->a_ops->invalidatepage)
2730 page->mapping->a_ops->invalidatepage(page, offset);
2731#endif
2732 unlock_page(page);
2733 return 0;
2734 }
2735
2736
2737
2738
2739
2740
2741
2742
2743 zero_user_segment(page, offset, PAGE_SIZE);
2744out:
2745 ret = mpage_writepage(page, get_block, wbc);
2746 if (ret == -EAGAIN)
2747 ret = __block_write_full_page(inode, page, get_block, wbc,
2748 end_buffer_async_write);
2749 return ret;
2750}
2751EXPORT_SYMBOL(nobh_writepage);
2752
2753int nobh_truncate_page(struct address_space *mapping,
2754 loff_t from, get_block_t *get_block)
2755{
2756 pgoff_t index = from >> PAGE_SHIFT;
2757 unsigned offset = from & (PAGE_SIZE-1);
2758 unsigned blocksize;
2759 sector_t iblock;
2760 unsigned length, pos;
2761 struct inode *inode = mapping->host;
2762 struct page *page;
2763 struct buffer_head map_bh;
2764 int err;
2765
2766 blocksize = i_blocksize(inode);
2767 length = offset & (blocksize - 1);
2768
2769
2770 if (!length)
2771 return 0;
2772
2773 length = blocksize - length;
2774 iblock = (sector_t)index << (PAGE_SHIFT - inode->i_blkbits);
2775
2776 page = grab_cache_page(mapping, index);
2777 err = -ENOMEM;
2778 if (!page)
2779 goto out;
2780
2781 if (page_has_buffers(page)) {
2782has_buffers:
2783 unlock_page(page);
2784 put_page(page);
2785 return block_truncate_page(mapping, from, get_block);
2786 }
2787
2788
2789 pos = blocksize;
2790 while (offset >= pos) {
2791 iblock++;
2792 pos += blocksize;
2793 }
2794
2795 map_bh.b_size = blocksize;
2796 map_bh.b_state = 0;
2797 err = get_block(inode, iblock, &map_bh, 0);
2798 if (err)
2799 goto unlock;
2800
2801 if (!buffer_mapped(&map_bh))
2802 goto unlock;
2803
2804
2805 if (!PageUptodate(page)) {
2806 err = mapping->a_ops->readpage(NULL, page);
2807 if (err) {
2808 put_page(page);
2809 goto out;
2810 }
2811 lock_page(page);
2812 if (!PageUptodate(page)) {
2813 err = -EIO;
2814 goto unlock;
2815 }
2816 if (page_has_buffers(page))
2817 goto has_buffers;
2818 }
2819 zero_user(page, offset, length);
2820 set_page_dirty(page);
2821 err = 0;
2822
2823unlock:
2824 unlock_page(page);
2825 put_page(page);
2826out:
2827 return err;
2828}
2829EXPORT_SYMBOL(nobh_truncate_page);
2830
2831int block_truncate_page(struct address_space *mapping,
2832 loff_t from, get_block_t *get_block)
2833{
2834 pgoff_t index = from >> PAGE_SHIFT;
2835 unsigned offset = from & (PAGE_SIZE-1);
2836 unsigned blocksize;
2837 sector_t iblock;
2838 unsigned length, pos;
2839 struct inode *inode = mapping->host;
2840 struct page *page;
2841 struct buffer_head *bh;
2842 int err;
2843
2844 blocksize = i_blocksize(inode);
2845 length = offset & (blocksize - 1);
2846
2847
2848 if (!length)
2849 return 0;
2850
2851 length = blocksize - length;
2852 iblock = (sector_t)index << (PAGE_SHIFT - inode->i_blkbits);
2853
2854 page = grab_cache_page(mapping, index);
2855 err = -ENOMEM;
2856 if (!page)
2857 goto out;
2858
2859 if (!page_has_buffers(page))
2860 create_empty_buffers(page, blocksize, 0);
2861
2862
2863 bh = page_buffers(page);
2864 pos = blocksize;
2865 while (offset >= pos) {
2866 bh = bh->b_this_page;
2867 iblock++;
2868 pos += blocksize;
2869 }
2870
2871 err = 0;
2872 if (!buffer_mapped(bh)) {
2873 WARN_ON(bh->b_size != blocksize);
2874 err = get_block(inode, iblock, bh, 0);
2875 if (err)
2876 goto unlock;
2877
2878 if (!buffer_mapped(bh))
2879 goto unlock;
2880 }
2881
2882
2883 if (PageUptodate(page))
2884 set_buffer_uptodate(bh);
2885
2886 if (!buffer_uptodate(bh) && !buffer_delay(bh) && !buffer_unwritten(bh)) {
2887 err = -EIO;
2888 ll_rw_block(REQ_OP_READ, 0, 1, &bh);
2889 wait_on_buffer(bh);
2890
2891 if (!buffer_uptodate(bh))
2892 goto unlock;
2893 }
2894
2895 zero_user(page, offset, length);
2896 mark_buffer_dirty(bh);
2897 err = 0;
2898
2899unlock:
2900 unlock_page(page);
2901 put_page(page);
2902out:
2903 return err;
2904}
2905EXPORT_SYMBOL(block_truncate_page);
2906
2907
2908
2909
2910int block_write_full_page(struct page *page, get_block_t *get_block,
2911 struct writeback_control *wbc)
2912{
2913 struct inode * const inode = page->mapping->host;
2914 loff_t i_size = i_size_read(inode);
2915 const pgoff_t end_index = i_size >> PAGE_SHIFT;
2916 unsigned offset;
2917
2918
2919 if (page->index < end_index)
2920 return __block_write_full_page(inode, page, get_block, wbc,
2921 end_buffer_async_write);
2922
2923
2924 offset = i_size & (PAGE_SIZE-1);
2925 if (page->index >= end_index+1 || !offset) {
2926
2927
2928
2929
2930
2931 do_invalidatepage(page, 0, PAGE_SIZE);
2932 unlock_page(page);
2933 return 0;
2934 }
2935
2936
2937
2938
2939
2940
2941
2942
2943 zero_user_segment(page, offset, PAGE_SIZE);
2944 return __block_write_full_page(inode, page, get_block, wbc,
2945 end_buffer_async_write);
2946}
2947EXPORT_SYMBOL(block_write_full_page);
2948
2949sector_t generic_block_bmap(struct address_space *mapping, sector_t block,
2950 get_block_t *get_block)
2951{
2952 struct inode *inode = mapping->host;
2953 struct buffer_head tmp = {
2954 .b_size = i_blocksize(inode),
2955 };
2956
2957 get_block(inode, block, &tmp, 0);
2958 return tmp.b_blocknr;
2959}
2960EXPORT_SYMBOL(generic_block_bmap);
2961
2962static void end_bio_bh_io_sync(struct bio *bio)
2963{
2964 struct buffer_head *bh = bio->bi_private;
2965
2966 if (unlikely(bio_flagged(bio, BIO_QUIET)))
2967 set_bit(BH_Quiet, &bh->b_state);
2968
2969 bh->b_end_io(bh, !bio->bi_status);
2970 bio_put(bio);
2971}
2972
2973
2974
2975
2976
2977
2978
2979
2980
2981
2982
2983
2984
2985void guard_bio_eod(int op, struct bio *bio)
2986{
2987 sector_t maxsector;
2988 struct bio_vec *bvec = bio_last_bvec_all(bio);
2989 unsigned truncated_bytes;
2990 struct hd_struct *part;
2991
2992 rcu_read_lock();
2993 part = __disk_get_part(bio->bi_disk, bio->bi_partno);
2994 if (part)
2995 maxsector = part_nr_sects_read(part);
2996 else
2997 maxsector = get_capacity(bio->bi_disk);
2998 rcu_read_unlock();
2999
3000 if (!maxsector)
3001 return;
3002
3003
3004
3005
3006
3007
3008 if (unlikely(bio->bi_iter.bi_sector >= maxsector))
3009 return;
3010
3011 maxsector -= bio->bi_iter.bi_sector;
3012 if (likely((bio->bi_iter.bi_size >> 9) <= maxsector))
3013 return;
3014
3015
3016 truncated_bytes = bio->bi_iter.bi_size - (maxsector << 9);
3017
3018
3019 bio->bi_iter.bi_size -= truncated_bytes;
3020 bvec->bv_len -= truncated_bytes;
3021
3022
3023 if (op == REQ_OP_READ) {
3024 zero_user(bvec->bv_page, bvec->bv_offset + bvec->bv_len,
3025 truncated_bytes);
3026 }
3027}
3028
3029static int submit_bh_wbc(int op, int op_flags, struct buffer_head *bh,
3030 enum rw_hint write_hint, struct writeback_control *wbc)
3031{
3032 struct bio *bio;
3033
3034 BUG_ON(!buffer_locked(bh));
3035 BUG_ON(!buffer_mapped(bh));
3036 BUG_ON(!bh->b_end_io);
3037 BUG_ON(buffer_delay(bh));
3038 BUG_ON(buffer_unwritten(bh));
3039
3040
3041
3042
3043 if (test_set_buffer_req(bh) && (op == REQ_OP_WRITE))
3044 clear_buffer_write_io_error(bh);
3045
3046
3047
3048
3049
3050 bio = bio_alloc(GFP_NOIO, 1);
3051
3052 if (wbc) {
3053 wbc_init_bio(wbc, bio);
3054 wbc_account_io(wbc, bh->b_page, bh->b_size);
3055 }
3056
3057 bio->bi_iter.bi_sector = bh->b_blocknr * (bh->b_size >> 9);
3058 bio_set_dev(bio, bh->b_bdev);
3059 bio->bi_write_hint = write_hint;
3060
3061 bio_add_page(bio, bh->b_page, bh->b_size, bh_offset(bh));
3062 BUG_ON(bio->bi_iter.bi_size != bh->b_size);
3063
3064 bio->bi_end_io = end_bio_bh_io_sync;
3065 bio->bi_private = bh;
3066
3067
3068 guard_bio_eod(op, bio);
3069
3070 if (buffer_meta(bh))
3071 op_flags |= REQ_META;
3072 if (buffer_prio(bh))
3073 op_flags |= REQ_PRIO;
3074 bio_set_op_attrs(bio, op, op_flags);
3075
3076 submit_bio(bio);
3077 return 0;
3078}
3079
3080int submit_bh(int op, int op_flags, struct buffer_head *bh)
3081{
3082 return submit_bh_wbc(op, op_flags, bh, 0, NULL);
3083}
3084EXPORT_SYMBOL(submit_bh);
3085
3086
3087
3088
3089
3090
3091
3092
3093
3094
3095
3096
3097
3098
3099
3100
3101
3102
3103
3104
3105
3106
3107
3108
3109
3110
3111
3112void ll_rw_block(int op, int op_flags, int nr, struct buffer_head *bhs[])
3113{
3114 int i;
3115
3116 for (i = 0; i < nr; i++) {
3117 struct buffer_head *bh = bhs[i];
3118
3119 if (!trylock_buffer(bh))
3120 continue;
3121 if (op == WRITE) {
3122 if (test_clear_buffer_dirty(bh)) {
3123 bh->b_end_io = end_buffer_write_sync;
3124 get_bh(bh);
3125 submit_bh(op, op_flags, bh);
3126 continue;
3127 }
3128 } else {
3129 if (!buffer_uptodate(bh)) {
3130 bh->b_end_io = end_buffer_read_sync;
3131 get_bh(bh);
3132 submit_bh(op, op_flags, bh);
3133 continue;
3134 }
3135 }
3136 unlock_buffer(bh);
3137 }
3138}
3139EXPORT_SYMBOL(ll_rw_block);
3140
3141void write_dirty_buffer(struct buffer_head *bh, int op_flags)
3142{
3143 lock_buffer(bh);
3144 if (!test_clear_buffer_dirty(bh)) {
3145 unlock_buffer(bh);
3146 return;
3147 }
3148 bh->b_end_io = end_buffer_write_sync;
3149 get_bh(bh);
3150 submit_bh(REQ_OP_WRITE, op_flags, bh);
3151}
3152EXPORT_SYMBOL(write_dirty_buffer);
3153
3154
3155
3156
3157
3158
3159int __sync_dirty_buffer(struct buffer_head *bh, int op_flags)
3160{
3161 int ret = 0;
3162
3163 WARN_ON(atomic_read(&bh->b_count) < 1);
3164 lock_buffer(bh);
3165 if (test_clear_buffer_dirty(bh)) {
3166 get_bh(bh);
3167 bh->b_end_io = end_buffer_write_sync;
3168 ret = submit_bh(REQ_OP_WRITE, op_flags, bh);
3169 wait_on_buffer(bh);
3170 if (!ret && !buffer_uptodate(bh))
3171 ret = -EIO;
3172 } else {
3173 unlock_buffer(bh);
3174 }
3175 return ret;
3176}
3177EXPORT_SYMBOL(__sync_dirty_buffer);
3178
3179int sync_dirty_buffer(struct buffer_head *bh)
3180{
3181 return __sync_dirty_buffer(bh, REQ_SYNC);
3182}
3183EXPORT_SYMBOL(sync_dirty_buffer);
3184
3185
3186
3187
3188
3189
3190
3191
3192
3193
3194
3195
3196
3197
3198
3199
3200
3201
3202
3203
3204
3205static inline int buffer_busy(struct buffer_head *bh)
3206{
3207 return atomic_read(&bh->b_count) |
3208 (bh->b_state & ((1 << BH_Dirty) | (1 << BH_Lock)));
3209}
3210
3211static int
3212drop_buffers(struct page *page, struct buffer_head **buffers_to_free)
3213{
3214 struct buffer_head *head = page_buffers(page);
3215 struct buffer_head *bh;
3216
3217 bh = head;
3218 do {
3219 if (buffer_busy(bh))
3220 goto failed;
3221 bh = bh->b_this_page;
3222 } while (bh != head);
3223
3224 do {
3225 struct buffer_head *next = bh->b_this_page;
3226
3227 if (bh->b_assoc_map)
3228 __remove_assoc_queue(bh);
3229 bh = next;
3230 } while (bh != head);
3231 *buffers_to_free = head;
3232 __clear_page_buffers(page);
3233 return 1;
3234failed:
3235 return 0;
3236}
3237
3238int try_to_free_buffers(struct page *page)
3239{
3240 struct address_space * const mapping = page->mapping;
3241 struct buffer_head *buffers_to_free = NULL;
3242 int ret = 0;
3243
3244 BUG_ON(!PageLocked(page));
3245 if (PageWriteback(page))
3246 return 0;
3247
3248 if (mapping == NULL) {
3249 ret = drop_buffers(page, &buffers_to_free);
3250 goto out;
3251 }
3252
3253 spin_lock(&mapping->private_lock);
3254 ret = drop_buffers(page, &buffers_to_free);
3255
3256
3257
3258
3259
3260
3261
3262
3263
3264
3265
3266
3267
3268
3269
3270 if (ret)
3271 cancel_dirty_page(page);
3272 spin_unlock(&mapping->private_lock);
3273out:
3274 if (buffers_to_free) {
3275 struct buffer_head *bh = buffers_to_free;
3276
3277 do {
3278 struct buffer_head *next = bh->b_this_page;
3279 free_buffer_head(bh);
3280 bh = next;
3281 } while (bh != buffers_to_free);
3282 }
3283 return ret;
3284}
3285EXPORT_SYMBOL(try_to_free_buffers);
3286
3287
3288
3289
3290
3291
3292
3293
3294SYSCALL_DEFINE2(bdflush, int, func, long, data)
3295{
3296 static int msg_count;
3297
3298 if (!capable(CAP_SYS_ADMIN))
3299 return -EPERM;
3300
3301 if (msg_count < 5) {
3302 msg_count++;
3303 printk(KERN_INFO
3304 "warning: process `%s' used the obsolete bdflush"
3305 " system call\n", current->comm);
3306 printk(KERN_INFO "Fix your initscripts?\n");
3307 }
3308
3309 if (func == 1)
3310 do_exit(0);
3311 return 0;
3312}
3313
3314
3315
3316
3317static struct kmem_cache *bh_cachep __read_mostly;
3318
3319
3320
3321
3322
3323static unsigned long max_buffer_heads;
3324
3325int buffer_heads_over_limit;
3326
3327struct bh_accounting {
3328 int nr;
3329 int ratelimit;
3330};
3331
3332static DEFINE_PER_CPU(struct bh_accounting, bh_accounting) = {0, 0};
3333
3334static void recalc_bh_state(void)
3335{
3336 int i;
3337 int tot = 0;
3338
3339 if (__this_cpu_inc_return(bh_accounting.ratelimit) - 1 < 4096)
3340 return;
3341 __this_cpu_write(bh_accounting.ratelimit, 0);
3342 for_each_online_cpu(i)
3343 tot += per_cpu(bh_accounting, i).nr;
3344 buffer_heads_over_limit = (tot > max_buffer_heads);
3345}
3346
3347struct buffer_head *alloc_buffer_head(gfp_t gfp_flags)
3348{
3349 struct buffer_head *ret = kmem_cache_zalloc(bh_cachep, gfp_flags);
3350 if (ret) {
3351 INIT_LIST_HEAD(&ret->b_assoc_buffers);
3352 preempt_disable();
3353 __this_cpu_inc(bh_accounting.nr);
3354 recalc_bh_state();
3355 preempt_enable();
3356 }
3357 return ret;
3358}
3359EXPORT_SYMBOL(alloc_buffer_head);
3360
3361void free_buffer_head(struct buffer_head *bh)
3362{
3363 BUG_ON(!list_empty(&bh->b_assoc_buffers));
3364 kmem_cache_free(bh_cachep, bh);
3365 preempt_disable();
3366 __this_cpu_dec(bh_accounting.nr);
3367 recalc_bh_state();
3368 preempt_enable();
3369}
3370EXPORT_SYMBOL(free_buffer_head);
3371
3372static int buffer_exit_cpu_dead(unsigned int cpu)
3373{
3374 int i;
3375 struct bh_lru *b = &per_cpu(bh_lrus, cpu);
3376
3377 for (i = 0; i < BH_LRU_SIZE; i++) {
3378 brelse(b->bhs[i]);
3379 b->bhs[i] = NULL;
3380 }
3381 this_cpu_add(bh_accounting.nr, per_cpu(bh_accounting, cpu).nr);
3382 per_cpu(bh_accounting, cpu).nr = 0;
3383 return 0;
3384}
3385
3386
3387
3388
3389
3390
3391
3392
3393int bh_uptodate_or_lock(struct buffer_head *bh)
3394{
3395 if (!buffer_uptodate(bh)) {
3396 lock_buffer(bh);
3397 if (!buffer_uptodate(bh))
3398 return 0;
3399 unlock_buffer(bh);
3400 }
3401 return 1;
3402}
3403EXPORT_SYMBOL(bh_uptodate_or_lock);
3404
3405
3406
3407
3408
3409
3410
3411int bh_submit_read(struct buffer_head *bh)
3412{
3413 BUG_ON(!buffer_locked(bh));
3414
3415 if (buffer_uptodate(bh)) {
3416 unlock_buffer(bh);
3417 return 0;
3418 }
3419
3420 get_bh(bh);
3421 bh->b_end_io = end_buffer_read_sync;
3422 submit_bh(REQ_OP_READ, 0, bh);
3423 wait_on_buffer(bh);
3424 if (buffer_uptodate(bh))
3425 return 0;
3426 return -EIO;
3427}
3428EXPORT_SYMBOL(bh_submit_read);
3429
3430
3431
3432
3433
3434
3435static loff_t
3436page_seek_hole_data(struct page *page, loff_t lastoff, int whence)
3437{
3438 loff_t offset = page_offset(page);
3439 struct buffer_head *bh, *head;
3440 bool seek_data = whence == SEEK_DATA;
3441
3442 if (lastoff < offset)
3443 lastoff = offset;
3444
3445 bh = head = page_buffers(page);
3446 do {
3447 offset += bh->b_size;
3448 if (lastoff >= offset)
3449 continue;
3450
3451
3452
3453
3454
3455
3456
3457
3458
3459 if ((buffer_unwritten(bh) || buffer_uptodate(bh)) == seek_data)
3460 return lastoff;
3461
3462 lastoff = offset;
3463 } while ((bh = bh->b_this_page) != head);
3464 return -ENOENT;
3465}
3466
3467
3468
3469
3470
3471
3472
3473
3474
3475
3476loff_t
3477page_cache_seek_hole_data(struct inode *inode, loff_t offset, loff_t length,
3478 int whence)
3479{
3480 pgoff_t index = offset >> PAGE_SHIFT;
3481 pgoff_t end = DIV_ROUND_UP(offset + length, PAGE_SIZE);
3482 loff_t lastoff = offset;
3483 struct pagevec pvec;
3484
3485 if (length <= 0)
3486 return -ENOENT;
3487
3488 pagevec_init(&pvec);
3489
3490 do {
3491 unsigned nr_pages, i;
3492
3493 nr_pages = pagevec_lookup_range(&pvec, inode->i_mapping, &index,
3494 end - 1);
3495 if (nr_pages == 0)
3496 break;
3497
3498 for (i = 0; i < nr_pages; i++) {
3499 struct page *page = pvec.pages[i];
3500
3501
3502
3503
3504
3505
3506
3507
3508
3509
3510
3511 if (whence == SEEK_HOLE &&
3512 lastoff < page_offset(page))
3513 goto check_range;
3514
3515 lock_page(page);
3516 if (likely(page->mapping == inode->i_mapping) &&
3517 page_has_buffers(page)) {
3518 lastoff = page_seek_hole_data(page, lastoff, whence);
3519 if (lastoff >= 0) {
3520 unlock_page(page);
3521 goto check_range;
3522 }
3523 }
3524 unlock_page(page);
3525 lastoff = page_offset(page) + PAGE_SIZE;
3526 }
3527 pagevec_release(&pvec);
3528 } while (index < end);
3529
3530
3531 if (whence != SEEK_HOLE)
3532 goto not_found;
3533
3534check_range:
3535 if (lastoff < offset + length)
3536 goto out;
3537not_found:
3538 lastoff = -ENOENT;
3539out:
3540 pagevec_release(&pvec);
3541 return lastoff;
3542}
3543
3544void __init buffer_init(void)
3545{
3546 unsigned long nrpages;
3547 int ret;
3548
3549 bh_cachep = kmem_cache_create("buffer_head",
3550 sizeof(struct buffer_head), 0,
3551 (SLAB_RECLAIM_ACCOUNT|SLAB_PANIC|
3552 SLAB_MEM_SPREAD),
3553 NULL);
3554
3555
3556
3557
3558 nrpages = (nr_free_buffer_pages() * 10) / 100;
3559 max_buffer_heads = nrpages * (PAGE_SIZE / sizeof(struct buffer_head));
3560 ret = cpuhp_setup_state_nocalls(CPUHP_FS_BUFF_DEAD, "fs/buffer:dead",
3561 NULL, buffer_exit_cpu_dead);
3562 WARN_ON(ret < 0);
3563}
3564