1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22#include <linux/kernel.h>
23#include <linux/sched/signal.h>
24#include <linux/syscalls.h>
25#include <linux/fs.h>
26#include <linux/iomap.h>
27#include <linux/mm.h>
28#include <linux/percpu.h>
29#include <linux/slab.h>
30#include <linux/capability.h>
31#include <linux/blkdev.h>
32#include <linux/file.h>
33#include <linux/quotaops.h>
34#include <linux/highmem.h>
35#include <linux/export.h>
36#include <linux/backing-dev.h>
37#include <linux/writeback.h>
38#include <linux/hash.h>
39#include <linux/suspend.h>
40#include <linux/buffer_head.h>
41#include <linux/task_io_accounting_ops.h>
42#include <linux/bio.h>
43#include <linux/cpu.h>
44#include <linux/bitops.h>
45#include <linux/mpage.h>
46#include <linux/bit_spinlock.h>
47#include <linux/pagevec.h>
48#include <linux/sched/mm.h>
49#include <trace/events/block.h>
50
51static int fsync_buffers_list(spinlock_t *lock, struct list_head *list);
52static int submit_bh_wbc(int op, int op_flags, struct buffer_head *bh,
53 enum rw_hint hint, struct writeback_control *wbc);
54
55#define BH_ENTRY(list) list_entry((list), struct buffer_head, b_assoc_buffers)
56
57inline void touch_buffer(struct buffer_head *bh)
58{
59 trace_block_touch_buffer(bh);
60 mark_page_accessed(bh->b_page);
61}
62EXPORT_SYMBOL(touch_buffer);
63
64void __lock_buffer(struct buffer_head *bh)
65{
66 wait_on_bit_lock_io(&bh->b_state, BH_Lock, TASK_UNINTERRUPTIBLE);
67}
68EXPORT_SYMBOL(__lock_buffer);
69
70void unlock_buffer(struct buffer_head *bh)
71{
72 clear_bit_unlock(BH_Lock, &bh->b_state);
73 smp_mb__after_atomic();
74 wake_up_bit(&bh->b_state, BH_Lock);
75}
76EXPORT_SYMBOL(unlock_buffer);
77
78
79
80
81
82
83void buffer_check_dirty_writeback(struct page *page,
84 bool *dirty, bool *writeback)
85{
86 struct buffer_head *head, *bh;
87 *dirty = false;
88 *writeback = false;
89
90 BUG_ON(!PageLocked(page));
91
92 if (!page_has_buffers(page))
93 return;
94
95 if (PageWriteback(page))
96 *writeback = true;
97
98 head = page_buffers(page);
99 bh = head;
100 do {
101 if (buffer_locked(bh))
102 *writeback = true;
103
104 if (buffer_dirty(bh))
105 *dirty = true;
106
107 bh = bh->b_this_page;
108 } while (bh != head);
109}
110EXPORT_SYMBOL(buffer_check_dirty_writeback);
111
112
113
114
115
116
117void __wait_on_buffer(struct buffer_head * bh)
118{
119 wait_on_bit_io(&bh->b_state, BH_Lock, TASK_UNINTERRUPTIBLE);
120}
121EXPORT_SYMBOL(__wait_on_buffer);
122
123static void
124__clear_page_buffers(struct page *page)
125{
126 ClearPagePrivate(page);
127 set_page_private(page, 0);
128 put_page(page);
129}
130
131static void buffer_io_error(struct buffer_head *bh, char *msg)
132{
133 if (!test_bit(BH_Quiet, &bh->b_state))
134 printk_ratelimited(KERN_ERR
135 "Buffer I/O error on dev %pg, logical block %llu%s\n",
136 bh->b_bdev, (unsigned long long)bh->b_blocknr, msg);
137}
138
139
140
141
142
143
144
145
146
147static void __end_buffer_read_notouch(struct buffer_head *bh, int uptodate)
148{
149 if (uptodate) {
150 set_buffer_uptodate(bh);
151 } else {
152
153 clear_buffer_uptodate(bh);
154 }
155 unlock_buffer(bh);
156}
157
158
159
160
161
162void end_buffer_read_sync(struct buffer_head *bh, int uptodate)
163{
164 __end_buffer_read_notouch(bh, uptodate);
165 put_bh(bh);
166}
167EXPORT_SYMBOL(end_buffer_read_sync);
168
169void end_buffer_write_sync(struct buffer_head *bh, int uptodate)
170{
171 if (uptodate) {
172 set_buffer_uptodate(bh);
173 } else {
174 buffer_io_error(bh, ", lost sync page write");
175 mark_buffer_write_io_error(bh);
176 clear_buffer_uptodate(bh);
177 }
178 unlock_buffer(bh);
179 put_bh(bh);
180}
181EXPORT_SYMBOL(end_buffer_write_sync);
182
183
184
185
186
187
188
189
190
191
192
193static struct buffer_head *
194__find_get_block_slow(struct block_device *bdev, sector_t block)
195{
196 struct inode *bd_inode = bdev->bd_inode;
197 struct address_space *bd_mapping = bd_inode->i_mapping;
198 struct buffer_head *ret = NULL;
199 pgoff_t index;
200 struct buffer_head *bh;
201 struct buffer_head *head;
202 struct page *page;
203 int all_mapped = 1;
204 static DEFINE_RATELIMIT_STATE(last_warned, HZ, 1);
205
206 index = block >> (PAGE_SHIFT - bd_inode->i_blkbits);
207 page = find_get_page_flags(bd_mapping, index, FGP_ACCESSED);
208 if (!page)
209 goto out;
210
211 spin_lock(&bd_mapping->private_lock);
212 if (!page_has_buffers(page))
213 goto out_unlock;
214 head = page_buffers(page);
215 bh = head;
216 do {
217 if (!buffer_mapped(bh))
218 all_mapped = 0;
219 else if (bh->b_blocknr == block) {
220 ret = bh;
221 get_bh(bh);
222 goto out_unlock;
223 }
224 bh = bh->b_this_page;
225 } while (bh != head);
226
227
228
229
230
231
232 ratelimit_set_flags(&last_warned, RATELIMIT_MSG_ON_RELEASE);
233 if (all_mapped && __ratelimit(&last_warned)) {
234 printk("__find_get_block_slow() failed. block=%llu, "
235 "b_blocknr=%llu, b_state=0x%08lx, b_size=%zu, "
236 "device %pg blocksize: %d\n",
237 (unsigned long long)block,
238 (unsigned long long)bh->b_blocknr,
239 bh->b_state, bh->b_size, bdev,
240 1 << bd_inode->i_blkbits);
241 }
242out_unlock:
243 spin_unlock(&bd_mapping->private_lock);
244 put_page(page);
245out:
246 return ret;
247}
248
249
250
251
252
253static void end_buffer_async_read(struct buffer_head *bh, int uptodate)
254{
255 unsigned long flags;
256 struct buffer_head *first;
257 struct buffer_head *tmp;
258 struct page *page;
259 int page_uptodate = 1;
260
261 BUG_ON(!buffer_async_read(bh));
262
263 page = bh->b_page;
264 if (uptodate) {
265 set_buffer_uptodate(bh);
266 } else {
267 clear_buffer_uptodate(bh);
268 buffer_io_error(bh, ", async page read");
269 SetPageError(page);
270 }
271
272
273
274
275
276
277 first = page_buffers(page);
278 local_irq_save(flags);
279 bit_spin_lock(BH_Uptodate_Lock, &first->b_state);
280 clear_buffer_async_read(bh);
281 unlock_buffer(bh);
282 tmp = bh;
283 do {
284 if (!buffer_uptodate(tmp))
285 page_uptodate = 0;
286 if (buffer_async_read(tmp)) {
287 BUG_ON(!buffer_locked(tmp));
288 goto still_busy;
289 }
290 tmp = tmp->b_this_page;
291 } while (tmp != bh);
292 bit_spin_unlock(BH_Uptodate_Lock, &first->b_state);
293 local_irq_restore(flags);
294
295
296
297
298
299 if (page_uptodate && !PageError(page))
300 SetPageUptodate(page);
301 unlock_page(page);
302 return;
303
304still_busy:
305 bit_spin_unlock(BH_Uptodate_Lock, &first->b_state);
306 local_irq_restore(flags);
307 return;
308}
309
310
311
312
313
314void end_buffer_async_write(struct buffer_head *bh, int uptodate)
315{
316 unsigned long flags;
317 struct buffer_head *first;
318 struct buffer_head *tmp;
319 struct page *page;
320
321 BUG_ON(!buffer_async_write(bh));
322
323 page = bh->b_page;
324 if (uptodate) {
325 set_buffer_uptodate(bh);
326 } else {
327 buffer_io_error(bh, ", lost async page write");
328 mark_buffer_write_io_error(bh);
329 clear_buffer_uptodate(bh);
330 SetPageError(page);
331 }
332
333 first = page_buffers(page);
334 local_irq_save(flags);
335 bit_spin_lock(BH_Uptodate_Lock, &first->b_state);
336
337 clear_buffer_async_write(bh);
338 unlock_buffer(bh);
339 tmp = bh->b_this_page;
340 while (tmp != bh) {
341 if (buffer_async_write(tmp)) {
342 BUG_ON(!buffer_locked(tmp));
343 goto still_busy;
344 }
345 tmp = tmp->b_this_page;
346 }
347 bit_spin_unlock(BH_Uptodate_Lock, &first->b_state);
348 local_irq_restore(flags);
349 end_page_writeback(page);
350 return;
351
352still_busy:
353 bit_spin_unlock(BH_Uptodate_Lock, &first->b_state);
354 local_irq_restore(flags);
355 return;
356}
357EXPORT_SYMBOL(end_buffer_async_write);
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380static void mark_buffer_async_read(struct buffer_head *bh)
381{
382 bh->b_end_io = end_buffer_async_read;
383 set_buffer_async_read(bh);
384}
385
386static void mark_buffer_async_write_endio(struct buffer_head *bh,
387 bh_end_io_t *handler)
388{
389 bh->b_end_io = handler;
390 set_buffer_async_write(bh);
391}
392
393void mark_buffer_async_write(struct buffer_head *bh)
394{
395 mark_buffer_async_write_endio(bh, end_buffer_async_write);
396}
397EXPORT_SYMBOL(mark_buffer_async_write);
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452static void __remove_assoc_queue(struct buffer_head *bh)
453{
454 list_del_init(&bh->b_assoc_buffers);
455 WARN_ON(!bh->b_assoc_map);
456 bh->b_assoc_map = NULL;
457}
458
459int inode_has_buffers(struct inode *inode)
460{
461 return !list_empty(&inode->i_data.private_list);
462}
463
464
465
466
467
468
469
470
471
472
473
474static int osync_buffers_list(spinlock_t *lock, struct list_head *list)
475{
476 struct buffer_head *bh;
477 struct list_head *p;
478 int err = 0;
479
480 spin_lock(lock);
481repeat:
482 list_for_each_prev(p, list) {
483 bh = BH_ENTRY(p);
484 if (buffer_locked(bh)) {
485 get_bh(bh);
486 spin_unlock(lock);
487 wait_on_buffer(bh);
488 if (!buffer_uptodate(bh))
489 err = -EIO;
490 brelse(bh);
491 spin_lock(lock);
492 goto repeat;
493 }
494 }
495 spin_unlock(lock);
496 return err;
497}
498
499void emergency_thaw_bdev(struct super_block *sb)
500{
501 while (sb->s_bdev && !thaw_bdev(sb->s_bdev, sb))
502 printk(KERN_WARNING "Emergency Thaw on %pg\n", sb->s_bdev);
503}
504
505
506
507
508
509
510
511
512
513
514
515
516int sync_mapping_buffers(struct address_space *mapping)
517{
518 struct address_space *buffer_mapping = mapping->private_data;
519
520 if (buffer_mapping == NULL || list_empty(&mapping->private_list))
521 return 0;
522
523 return fsync_buffers_list(&buffer_mapping->private_lock,
524 &mapping->private_list);
525}
526EXPORT_SYMBOL(sync_mapping_buffers);
527
528
529
530
531
532
533
534void write_boundary_block(struct block_device *bdev,
535 sector_t bblock, unsigned blocksize)
536{
537 struct buffer_head *bh = __find_get_block(bdev, bblock + 1, blocksize);
538 if (bh) {
539 if (buffer_dirty(bh))
540 ll_rw_block(REQ_OP_WRITE, 0, 1, &bh);
541 put_bh(bh);
542 }
543}
544
545void mark_buffer_dirty_inode(struct buffer_head *bh, struct inode *inode)
546{
547 struct address_space *mapping = inode->i_mapping;
548 struct address_space *buffer_mapping = bh->b_page->mapping;
549
550 mark_buffer_dirty(bh);
551 if (!mapping->private_data) {
552 mapping->private_data = buffer_mapping;
553 } else {
554 BUG_ON(mapping->private_data != buffer_mapping);
555 }
556 if (!bh->b_assoc_map) {
557 spin_lock(&buffer_mapping->private_lock);
558 list_move_tail(&bh->b_assoc_buffers,
559 &mapping->private_list);
560 bh->b_assoc_map = mapping;
561 spin_unlock(&buffer_mapping->private_lock);
562 }
563}
564EXPORT_SYMBOL(mark_buffer_dirty_inode);
565
566
567
568
569
570
571
572
573
574
575void __set_page_dirty(struct page *page, struct address_space *mapping,
576 int warn)
577{
578 unsigned long flags;
579
580 xa_lock_irqsave(&mapping->i_pages, flags);
581 if (page->mapping) {
582 WARN_ON_ONCE(warn && !PageUptodate(page));
583 account_page_dirtied(page, mapping);
584 __xa_set_mark(&mapping->i_pages, page_index(page),
585 PAGECACHE_TAG_DIRTY);
586 }
587 xa_unlock_irqrestore(&mapping->i_pages, flags);
588}
589EXPORT_SYMBOL_GPL(__set_page_dirty);
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616int __set_page_dirty_buffers(struct page *page)
617{
618 int newly_dirty;
619 struct address_space *mapping = page_mapping(page);
620
621 if (unlikely(!mapping))
622 return !TestSetPageDirty(page);
623
624 spin_lock(&mapping->private_lock);
625 if (page_has_buffers(page)) {
626 struct buffer_head *head = page_buffers(page);
627 struct buffer_head *bh = head;
628
629 do {
630 set_buffer_dirty(bh);
631 bh = bh->b_this_page;
632 } while (bh != head);
633 }
634
635
636
637
638 lock_page_memcg(page);
639 newly_dirty = !TestSetPageDirty(page);
640 spin_unlock(&mapping->private_lock);
641
642 if (newly_dirty)
643 __set_page_dirty(page, mapping, 1);
644
645 unlock_page_memcg(page);
646
647 if (newly_dirty)
648 __mark_inode_dirty(mapping->host, I_DIRTY_PAGES);
649
650 return newly_dirty;
651}
652EXPORT_SYMBOL(__set_page_dirty_buffers);
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673static int fsync_buffers_list(spinlock_t *lock, struct list_head *list)
674{
675 struct buffer_head *bh;
676 struct list_head tmp;
677 struct address_space *mapping;
678 int err = 0, err2;
679 struct blk_plug plug;
680
681 INIT_LIST_HEAD(&tmp);
682 blk_start_plug(&plug);
683
684 spin_lock(lock);
685 while (!list_empty(list)) {
686 bh = BH_ENTRY(list->next);
687 mapping = bh->b_assoc_map;
688 __remove_assoc_queue(bh);
689
690
691 smp_mb();
692 if (buffer_dirty(bh) || buffer_locked(bh)) {
693 list_add(&bh->b_assoc_buffers, &tmp);
694 bh->b_assoc_map = mapping;
695 if (buffer_dirty(bh)) {
696 get_bh(bh);
697 spin_unlock(lock);
698
699
700
701
702
703
704
705 write_dirty_buffer(bh, REQ_SYNC);
706
707
708
709
710
711
712
713 brelse(bh);
714 spin_lock(lock);
715 }
716 }
717 }
718
719 spin_unlock(lock);
720 blk_finish_plug(&plug);
721 spin_lock(lock);
722
723 while (!list_empty(&tmp)) {
724 bh = BH_ENTRY(tmp.prev);
725 get_bh(bh);
726 mapping = bh->b_assoc_map;
727 __remove_assoc_queue(bh);
728
729
730 smp_mb();
731 if (buffer_dirty(bh)) {
732 list_add(&bh->b_assoc_buffers,
733 &mapping->private_list);
734 bh->b_assoc_map = mapping;
735 }
736 spin_unlock(lock);
737 wait_on_buffer(bh);
738 if (!buffer_uptodate(bh))
739 err = -EIO;
740 brelse(bh);
741 spin_lock(lock);
742 }
743
744 spin_unlock(lock);
745 err2 = osync_buffers_list(lock, list);
746 if (err)
747 return err;
748 else
749 return err2;
750}
751
752
753
754
755
756
757
758
759
760
761void invalidate_inode_buffers(struct inode *inode)
762{
763 if (inode_has_buffers(inode)) {
764 struct address_space *mapping = &inode->i_data;
765 struct list_head *list = &mapping->private_list;
766 struct address_space *buffer_mapping = mapping->private_data;
767
768 spin_lock(&buffer_mapping->private_lock);
769 while (!list_empty(list))
770 __remove_assoc_queue(BH_ENTRY(list->next));
771 spin_unlock(&buffer_mapping->private_lock);
772 }
773}
774EXPORT_SYMBOL(invalidate_inode_buffers);
775
776
777
778
779
780
781
782int remove_inode_buffers(struct inode *inode)
783{
784 int ret = 1;
785
786 if (inode_has_buffers(inode)) {
787 struct address_space *mapping = &inode->i_data;
788 struct list_head *list = &mapping->private_list;
789 struct address_space *buffer_mapping = mapping->private_data;
790
791 spin_lock(&buffer_mapping->private_lock);
792 while (!list_empty(list)) {
793 struct buffer_head *bh = BH_ENTRY(list->next);
794 if (buffer_dirty(bh)) {
795 ret = 0;
796 break;
797 }
798 __remove_assoc_queue(bh);
799 }
800 spin_unlock(&buffer_mapping->private_lock);
801 }
802 return ret;
803}
804
805
806
807
808
809
810
811
812
813
814struct buffer_head *alloc_page_buffers(struct page *page, unsigned long size,
815 bool retry)
816{
817 struct buffer_head *bh, *head;
818 gfp_t gfp = GFP_NOFS | __GFP_ACCOUNT;
819 long offset;
820 struct mem_cgroup *memcg;
821
822 if (retry)
823 gfp |= __GFP_NOFAIL;
824
825 memcg = get_mem_cgroup_from_page(page);
826 memalloc_use_memcg(memcg);
827
828 head = NULL;
829 offset = PAGE_SIZE;
830 while ((offset -= size) >= 0) {
831 bh = alloc_buffer_head(gfp);
832 if (!bh)
833 goto no_grow;
834
835 bh->b_this_page = head;
836 bh->b_blocknr = -1;
837 head = bh;
838
839 bh->b_size = size;
840
841
842 set_bh_page(bh, page, offset);
843 }
844out:
845 memalloc_unuse_memcg();
846 mem_cgroup_put(memcg);
847 return head;
848
849
850
851no_grow:
852 if (head) {
853 do {
854 bh = head;
855 head = head->b_this_page;
856 free_buffer_head(bh);
857 } while (head);
858 }
859
860 goto out;
861}
862EXPORT_SYMBOL_GPL(alloc_page_buffers);
863
864static inline void
865link_dev_buffers(struct page *page, struct buffer_head *head)
866{
867 struct buffer_head *bh, *tail;
868
869 bh = head;
870 do {
871 tail = bh;
872 bh = bh->b_this_page;
873 } while (bh);
874 tail->b_this_page = head;
875 attach_page_buffers(page, head);
876}
877
878static sector_t blkdev_max_block(struct block_device *bdev, unsigned int size)
879{
880 sector_t retval = ~((sector_t)0);
881 loff_t sz = i_size_read(bdev->bd_inode);
882
883 if (sz) {
884 unsigned int sizebits = blksize_bits(size);
885 retval = (sz >> sizebits);
886 }
887 return retval;
888}
889
890
891
892
893static sector_t
894init_page_buffers(struct page *page, struct block_device *bdev,
895 sector_t block, int size)
896{
897 struct buffer_head *head = page_buffers(page);
898 struct buffer_head *bh = head;
899 int uptodate = PageUptodate(page);
900 sector_t end_block = blkdev_max_block(I_BDEV(bdev->bd_inode), size);
901
902 do {
903 if (!buffer_mapped(bh)) {
904 bh->b_end_io = NULL;
905 bh->b_private = NULL;
906 bh->b_bdev = bdev;
907 bh->b_blocknr = block;
908 if (uptodate)
909 set_buffer_uptodate(bh);
910 if (block < end_block)
911 set_buffer_mapped(bh);
912 }
913 block++;
914 bh = bh->b_this_page;
915 } while (bh != head);
916
917
918
919
920 return end_block;
921}
922
923
924
925
926
927
928static int
929grow_dev_page(struct block_device *bdev, sector_t block,
930 pgoff_t index, int size, int sizebits, gfp_t gfp)
931{
932 struct inode *inode = bdev->bd_inode;
933 struct page *page;
934 struct buffer_head *bh;
935 sector_t end_block;
936 int ret = 0;
937 gfp_t gfp_mask;
938
939 gfp_mask = mapping_gfp_constraint(inode->i_mapping, ~__GFP_FS) | gfp;
940
941
942
943
944
945
946
947 gfp_mask |= __GFP_NOFAIL;
948
949 page = find_or_create_page(inode->i_mapping, index, gfp_mask);
950
951 BUG_ON(!PageLocked(page));
952
953 if (page_has_buffers(page)) {
954 bh = page_buffers(page);
955 if (bh->b_size == size) {
956 end_block = init_page_buffers(page, bdev,
957 (sector_t)index << sizebits,
958 size);
959 goto done;
960 }
961 if (!try_to_free_buffers(page))
962 goto failed;
963 }
964
965
966
967
968 bh = alloc_page_buffers(page, size, true);
969
970
971
972
973
974
975 spin_lock(&inode->i_mapping->private_lock);
976 link_dev_buffers(page, bh);
977 end_block = init_page_buffers(page, bdev, (sector_t)index << sizebits,
978 size);
979 spin_unlock(&inode->i_mapping->private_lock);
980done:
981 ret = (block < end_block) ? 1 : -ENXIO;
982failed:
983 unlock_page(page);
984 put_page(page);
985 return ret;
986}
987
988
989
990
991
992static int
993grow_buffers(struct block_device *bdev, sector_t block, int size, gfp_t gfp)
994{
995 pgoff_t index;
996 int sizebits;
997
998 sizebits = -1;
999 do {
1000 sizebits++;
1001 } while ((size << sizebits) < PAGE_SIZE);
1002
1003 index = block >> sizebits;
1004
1005
1006
1007
1008
1009 if (unlikely(index != block >> sizebits)) {
1010 printk(KERN_ERR "%s: requested out-of-range block %llu for "
1011 "device %pg\n",
1012 __func__, (unsigned long long)block,
1013 bdev);
1014 return -EIO;
1015 }
1016
1017
1018 return grow_dev_page(bdev, block, index, size, sizebits, gfp);
1019}
1020
1021static struct buffer_head *
1022__getblk_slow(struct block_device *bdev, sector_t block,
1023 unsigned size, gfp_t gfp)
1024{
1025
1026 if (unlikely(size & (bdev_logical_block_size(bdev)-1) ||
1027 (size < 512 || size > PAGE_SIZE))) {
1028 printk(KERN_ERR "getblk(): invalid block size %d requested\n",
1029 size);
1030 printk(KERN_ERR "logical block size: %d\n",
1031 bdev_logical_block_size(bdev));
1032
1033 dump_stack();
1034 return NULL;
1035 }
1036
1037 for (;;) {
1038 struct buffer_head *bh;
1039 int ret;
1040
1041 bh = __find_get_block(bdev, block, size);
1042 if (bh)
1043 return bh;
1044
1045 ret = grow_buffers(bdev, block, size, gfp);
1046 if (ret < 0)
1047 return NULL;
1048 }
1049}
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086void mark_buffer_dirty(struct buffer_head *bh)
1087{
1088 WARN_ON_ONCE(!buffer_uptodate(bh));
1089
1090 trace_block_dirty_buffer(bh);
1091
1092
1093
1094
1095
1096
1097
1098 if (buffer_dirty(bh)) {
1099 smp_mb();
1100 if (buffer_dirty(bh))
1101 return;
1102 }
1103
1104 if (!test_set_buffer_dirty(bh)) {
1105 struct page *page = bh->b_page;
1106 struct address_space *mapping = NULL;
1107
1108 lock_page_memcg(page);
1109 if (!TestSetPageDirty(page)) {
1110 mapping = page_mapping(page);
1111 if (mapping)
1112 __set_page_dirty(page, mapping, 0);
1113 }
1114 unlock_page_memcg(page);
1115 if (mapping)
1116 __mark_inode_dirty(mapping->host, I_DIRTY_PAGES);
1117 }
1118}
1119EXPORT_SYMBOL(mark_buffer_dirty);
1120
1121void mark_buffer_write_io_error(struct buffer_head *bh)
1122{
1123 set_buffer_write_io_error(bh);
1124
1125 if (bh->b_page && bh->b_page->mapping)
1126 mapping_set_error(bh->b_page->mapping, -EIO);
1127 if (bh->b_assoc_map)
1128 mapping_set_error(bh->b_assoc_map, -EIO);
1129}
1130EXPORT_SYMBOL(mark_buffer_write_io_error);
1131
1132
1133
1134
1135
1136
1137
1138
1139void __brelse(struct buffer_head * buf)
1140{
1141 if (atomic_read(&buf->b_count)) {
1142 put_bh(buf);
1143 return;
1144 }
1145 WARN(1, KERN_ERR "VFS: brelse: Trying to free free buffer\n");
1146}
1147EXPORT_SYMBOL(__brelse);
1148
1149
1150
1151
1152
1153void __bforget(struct buffer_head *bh)
1154{
1155 clear_buffer_dirty(bh);
1156 if (bh->b_assoc_map) {
1157 struct address_space *buffer_mapping = bh->b_page->mapping;
1158
1159 spin_lock(&buffer_mapping->private_lock);
1160 list_del_init(&bh->b_assoc_buffers);
1161 bh->b_assoc_map = NULL;
1162 spin_unlock(&buffer_mapping->private_lock);
1163 }
1164 __brelse(bh);
1165}
1166EXPORT_SYMBOL(__bforget);
1167
1168static struct buffer_head *__bread_slow(struct buffer_head *bh)
1169{
1170 lock_buffer(bh);
1171 if (buffer_uptodate(bh)) {
1172 unlock_buffer(bh);
1173 return bh;
1174 } else {
1175 get_bh(bh);
1176 bh->b_end_io = end_buffer_read_sync;
1177 submit_bh(REQ_OP_READ, 0, bh);
1178 wait_on_buffer(bh);
1179 if (buffer_uptodate(bh))
1180 return bh;
1181 }
1182 brelse(bh);
1183 return NULL;
1184}
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200#define BH_LRU_SIZE 16
1201
1202struct bh_lru {
1203 struct buffer_head *bhs[BH_LRU_SIZE];
1204};
1205
1206static DEFINE_PER_CPU(struct bh_lru, bh_lrus) = {{ NULL }};
1207
1208#ifdef CONFIG_SMP
1209#define bh_lru_lock() local_irq_disable()
1210#define bh_lru_unlock() local_irq_enable()
1211#else
1212#define bh_lru_lock() preempt_disable()
1213#define bh_lru_unlock() preempt_enable()
1214#endif
1215
1216static inline void check_irqs_on(void)
1217{
1218#ifdef irqs_disabled
1219 BUG_ON(irqs_disabled());
1220#endif
1221}
1222
1223
1224
1225
1226
1227
1228static void bh_lru_install(struct buffer_head *bh)
1229{
1230 struct buffer_head *evictee = bh;
1231 struct bh_lru *b;
1232 int i;
1233
1234 check_irqs_on();
1235 bh_lru_lock();
1236
1237 b = this_cpu_ptr(&bh_lrus);
1238 for (i = 0; i < BH_LRU_SIZE; i++) {
1239 swap(evictee, b->bhs[i]);
1240 if (evictee == bh) {
1241 bh_lru_unlock();
1242 return;
1243 }
1244 }
1245
1246 get_bh(bh);
1247 bh_lru_unlock();
1248 brelse(evictee);
1249}
1250
1251
1252
1253
1254static struct buffer_head *
1255lookup_bh_lru(struct block_device *bdev, sector_t block, unsigned size)
1256{
1257 struct buffer_head *ret = NULL;
1258 unsigned int i;
1259
1260 check_irqs_on();
1261 bh_lru_lock();
1262 for (i = 0; i < BH_LRU_SIZE; i++) {
1263 struct buffer_head *bh = __this_cpu_read(bh_lrus.bhs[i]);
1264
1265 if (bh && bh->b_blocknr == block && bh->b_bdev == bdev &&
1266 bh->b_size == size) {
1267 if (i) {
1268 while (i) {
1269 __this_cpu_write(bh_lrus.bhs[i],
1270 __this_cpu_read(bh_lrus.bhs[i - 1]));
1271 i--;
1272 }
1273 __this_cpu_write(bh_lrus.bhs[0], bh);
1274 }
1275 get_bh(bh);
1276 ret = bh;
1277 break;
1278 }
1279 }
1280 bh_lru_unlock();
1281 return ret;
1282}
1283
1284
1285
1286
1287
1288
1289struct buffer_head *
1290__find_get_block(struct block_device *bdev, sector_t block, unsigned size)
1291{
1292 struct buffer_head *bh = lookup_bh_lru(bdev, block, size);
1293
1294 if (bh == NULL) {
1295
1296 bh = __find_get_block_slow(bdev, block);
1297 if (bh)
1298 bh_lru_install(bh);
1299 } else
1300 touch_buffer(bh);
1301
1302 return bh;
1303}
1304EXPORT_SYMBOL(__find_get_block);
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314struct buffer_head *
1315__getblk_gfp(struct block_device *bdev, sector_t block,
1316 unsigned size, gfp_t gfp)
1317{
1318 struct buffer_head *bh = __find_get_block(bdev, block, size);
1319
1320 might_sleep();
1321 if (bh == NULL)
1322 bh = __getblk_slow(bdev, block, size, gfp);
1323 return bh;
1324}
1325EXPORT_SYMBOL(__getblk_gfp);
1326
1327
1328
1329
1330void __breadahead(struct block_device *bdev, sector_t block, unsigned size)
1331{
1332 struct buffer_head *bh = __getblk(bdev, block, size);
1333 if (likely(bh)) {
1334 ll_rw_block(REQ_OP_READ, REQ_RAHEAD, 1, &bh);
1335 brelse(bh);
1336 }
1337}
1338EXPORT_SYMBOL(__breadahead);
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352struct buffer_head *
1353__bread_gfp(struct block_device *bdev, sector_t block,
1354 unsigned size, gfp_t gfp)
1355{
1356 struct buffer_head *bh = __getblk_gfp(bdev, block, size, gfp);
1357
1358 if (likely(bh) && !buffer_uptodate(bh))
1359 bh = __bread_slow(bh);
1360 return bh;
1361}
1362EXPORT_SYMBOL(__bread_gfp);
1363
1364
1365
1366
1367
1368
1369static void invalidate_bh_lru(void *arg)
1370{
1371 struct bh_lru *b = &get_cpu_var(bh_lrus);
1372 int i;
1373
1374 for (i = 0; i < BH_LRU_SIZE; i++) {
1375 brelse(b->bhs[i]);
1376 b->bhs[i] = NULL;
1377 }
1378 put_cpu_var(bh_lrus);
1379}
1380
1381static bool has_bh_in_lru(int cpu, void *dummy)
1382{
1383 struct bh_lru *b = per_cpu_ptr(&bh_lrus, cpu);
1384 int i;
1385
1386 for (i = 0; i < BH_LRU_SIZE; i++) {
1387 if (b->bhs[i])
1388 return 1;
1389 }
1390
1391 return 0;
1392}
1393
1394void invalidate_bh_lrus(void)
1395{
1396 on_each_cpu_cond(has_bh_in_lru, invalidate_bh_lru, NULL, 1, GFP_KERNEL);
1397}
1398EXPORT_SYMBOL_GPL(invalidate_bh_lrus);
1399
1400void set_bh_page(struct buffer_head *bh,
1401 struct page *page, unsigned long offset)
1402{
1403 bh->b_page = page;
1404 BUG_ON(offset >= PAGE_SIZE);
1405 if (PageHighMem(page))
1406
1407
1408
1409 bh->b_data = (char *)(0 + offset);
1410 else
1411 bh->b_data = page_address(page) + offset;
1412}
1413EXPORT_SYMBOL(set_bh_page);
1414
1415
1416
1417
1418
1419
1420#define BUFFER_FLAGS_DISCARD \
1421 (1 << BH_Mapped | 1 << BH_New | 1 << BH_Req | \
1422 1 << BH_Delay | 1 << BH_Unwritten)
1423
1424static void discard_buffer(struct buffer_head * bh)
1425{
1426 unsigned long b_state, b_state_old;
1427
1428 lock_buffer(bh);
1429 clear_buffer_dirty(bh);
1430 bh->b_bdev = NULL;
1431 b_state = bh->b_state;
1432 for (;;) {
1433 b_state_old = cmpxchg(&bh->b_state, b_state,
1434 (b_state & ~BUFFER_FLAGS_DISCARD));
1435 if (b_state_old == b_state)
1436 break;
1437 b_state = b_state_old;
1438 }
1439 unlock_buffer(bh);
1440}
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458void block_invalidatepage(struct page *page, unsigned int offset,
1459 unsigned int length)
1460{
1461 struct buffer_head *head, *bh, *next;
1462 unsigned int curr_off = 0;
1463 unsigned int stop = length + offset;
1464
1465 BUG_ON(!PageLocked(page));
1466 if (!page_has_buffers(page))
1467 goto out;
1468
1469
1470
1471
1472 BUG_ON(stop > PAGE_SIZE || stop < length);
1473
1474 head = page_buffers(page);
1475 bh = head;
1476 do {
1477 unsigned int next_off = curr_off + bh->b_size;
1478 next = bh->b_this_page;
1479
1480
1481
1482
1483 if (next_off > stop)
1484 goto out;
1485
1486
1487
1488
1489 if (offset <= curr_off)
1490 discard_buffer(bh);
1491 curr_off = next_off;
1492 bh = next;
1493 } while (bh != head);
1494
1495
1496
1497
1498
1499
1500 if (length == PAGE_SIZE)
1501 try_to_release_page(page, 0);
1502out:
1503 return;
1504}
1505EXPORT_SYMBOL(block_invalidatepage);
1506
1507
1508
1509
1510
1511
1512
1513void create_empty_buffers(struct page *page,
1514 unsigned long blocksize, unsigned long b_state)
1515{
1516 struct buffer_head *bh, *head, *tail;
1517
1518 head = alloc_page_buffers(page, blocksize, true);
1519 bh = head;
1520 do {
1521 bh->b_state |= b_state;
1522 tail = bh;
1523 bh = bh->b_this_page;
1524 } while (bh);
1525 tail->b_this_page = head;
1526
1527 spin_lock(&page->mapping->private_lock);
1528 if (PageUptodate(page) || PageDirty(page)) {
1529 bh = head;
1530 do {
1531 if (PageDirty(page))
1532 set_buffer_dirty(bh);
1533 if (PageUptodate(page))
1534 set_buffer_uptodate(bh);
1535 bh = bh->b_this_page;
1536 } while (bh != head);
1537 }
1538 attach_page_buffers(page, head);
1539 spin_unlock(&page->mapping->private_lock);
1540}
1541EXPORT_SYMBOL(create_empty_buffers);
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563void clean_bdev_aliases(struct block_device *bdev, sector_t block, sector_t len)
1564{
1565 struct inode *bd_inode = bdev->bd_inode;
1566 struct address_space *bd_mapping = bd_inode->i_mapping;
1567 struct pagevec pvec;
1568 pgoff_t index = block >> (PAGE_SHIFT - bd_inode->i_blkbits);
1569 pgoff_t end;
1570 int i, count;
1571 struct buffer_head *bh;
1572 struct buffer_head *head;
1573
1574 end = (block + len - 1) >> (PAGE_SHIFT - bd_inode->i_blkbits);
1575 pagevec_init(&pvec);
1576 while (pagevec_lookup_range(&pvec, bd_mapping, &index, end)) {
1577 count = pagevec_count(&pvec);
1578 for (i = 0; i < count; i++) {
1579 struct page *page = pvec.pages[i];
1580
1581 if (!page_has_buffers(page))
1582 continue;
1583
1584
1585
1586
1587
1588 lock_page(page);
1589
1590 if (!page_has_buffers(page))
1591 goto unlock_page;
1592 head = page_buffers(page);
1593 bh = head;
1594 do {
1595 if (!buffer_mapped(bh) || (bh->b_blocknr < block))
1596 goto next;
1597 if (bh->b_blocknr >= block + len)
1598 break;
1599 clear_buffer_dirty(bh);
1600 wait_on_buffer(bh);
1601 clear_buffer_req(bh);
1602next:
1603 bh = bh->b_this_page;
1604 } while (bh != head);
1605unlock_page:
1606 unlock_page(page);
1607 }
1608 pagevec_release(&pvec);
1609 cond_resched();
1610
1611 if (index > end || !index)
1612 break;
1613 }
1614}
1615EXPORT_SYMBOL(clean_bdev_aliases);
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625static inline int block_size_bits(unsigned int blocksize)
1626{
1627 return ilog2(blocksize);
1628}
1629
1630static struct buffer_head *create_page_buffers(struct page *page, struct inode *inode, unsigned int b_state)
1631{
1632 BUG_ON(!PageLocked(page));
1633
1634 if (!page_has_buffers(page))
1635 create_empty_buffers(page, 1 << READ_ONCE(inode->i_blkbits),
1636 b_state);
1637 return page_buffers(page);
1638}
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669int __block_write_full_page(struct inode *inode, struct page *page,
1670 get_block_t *get_block, struct writeback_control *wbc,
1671 bh_end_io_t *handler)
1672{
1673 int err;
1674 sector_t block;
1675 sector_t last_block;
1676 struct buffer_head *bh, *head;
1677 unsigned int blocksize, bbits;
1678 int nr_underway = 0;
1679 int write_flags = wbc_to_write_flags(wbc);
1680
1681 head = create_page_buffers(page, inode,
1682 (1 << BH_Dirty)|(1 << BH_Uptodate));
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694 bh = head;
1695 blocksize = bh->b_size;
1696 bbits = block_size_bits(blocksize);
1697
1698 block = (sector_t)page->index << (PAGE_SHIFT - bbits);
1699 last_block = (i_size_read(inode) - 1) >> bbits;
1700
1701
1702
1703
1704
1705 do {
1706 if (block > last_block) {
1707
1708
1709
1710
1711
1712
1713
1714
1715 clear_buffer_dirty(bh);
1716 set_buffer_uptodate(bh);
1717 } else if ((!buffer_mapped(bh) || buffer_delay(bh)) &&
1718 buffer_dirty(bh)) {
1719 WARN_ON(bh->b_size != blocksize);
1720 err = get_block(inode, block, bh, 1);
1721 if (err)
1722 goto recover;
1723 clear_buffer_delay(bh);
1724 if (buffer_new(bh)) {
1725
1726 clear_buffer_new(bh);
1727 clean_bdev_bh_alias(bh);
1728 }
1729 }
1730 bh = bh->b_this_page;
1731 block++;
1732 } while (bh != head);
1733
1734 do {
1735 if (!buffer_mapped(bh))
1736 continue;
1737
1738
1739
1740
1741
1742
1743
1744 if (wbc->sync_mode != WB_SYNC_NONE) {
1745 lock_buffer(bh);
1746 } else if (!trylock_buffer(bh)) {
1747 redirty_page_for_writepage(wbc, page);
1748 continue;
1749 }
1750 if (test_clear_buffer_dirty(bh)) {
1751 mark_buffer_async_write_endio(bh, handler);
1752 } else {
1753 unlock_buffer(bh);
1754 }
1755 } while ((bh = bh->b_this_page) != head);
1756
1757
1758
1759
1760
1761 BUG_ON(PageWriteback(page));
1762 set_page_writeback(page);
1763
1764 do {
1765 struct buffer_head *next = bh->b_this_page;
1766 if (buffer_async_write(bh)) {
1767 submit_bh_wbc(REQ_OP_WRITE, write_flags, bh,
1768 inode->i_write_hint, wbc);
1769 nr_underway++;
1770 }
1771 bh = next;
1772 } while (bh != head);
1773 unlock_page(page);
1774
1775 err = 0;
1776done:
1777 if (nr_underway == 0) {
1778
1779
1780
1781
1782
1783 end_page_writeback(page);
1784
1785
1786
1787
1788
1789 }
1790 return err;
1791
1792recover:
1793
1794
1795
1796
1797
1798
1799 bh = head;
1800
1801 do {
1802 if (buffer_mapped(bh) && buffer_dirty(bh) &&
1803 !buffer_delay(bh)) {
1804 lock_buffer(bh);
1805 mark_buffer_async_write_endio(bh, handler);
1806 } else {
1807
1808
1809
1810
1811 clear_buffer_dirty(bh);
1812 }
1813 } while ((bh = bh->b_this_page) != head);
1814 SetPageError(page);
1815 BUG_ON(PageWriteback(page));
1816 mapping_set_error(page->mapping, err);
1817 set_page_writeback(page);
1818 do {
1819 struct buffer_head *next = bh->b_this_page;
1820 if (buffer_async_write(bh)) {
1821 clear_buffer_dirty(bh);
1822 submit_bh_wbc(REQ_OP_WRITE, write_flags, bh,
1823 inode->i_write_hint, wbc);
1824 nr_underway++;
1825 }
1826 bh = next;
1827 } while (bh != head);
1828 unlock_page(page);
1829 goto done;
1830}
1831EXPORT_SYMBOL(__block_write_full_page);
1832
1833
1834
1835
1836
1837
1838void page_zero_new_buffers(struct page *page, unsigned from, unsigned to)
1839{
1840 unsigned int block_start, block_end;
1841 struct buffer_head *head, *bh;
1842
1843 BUG_ON(!PageLocked(page));
1844 if (!page_has_buffers(page))
1845 return;
1846
1847 bh = head = page_buffers(page);
1848 block_start = 0;
1849 do {
1850 block_end = block_start + bh->b_size;
1851
1852 if (buffer_new(bh)) {
1853 if (block_end > from && block_start < to) {
1854 if (!PageUptodate(page)) {
1855 unsigned start, size;
1856
1857 start = max(from, block_start);
1858 size = min(to, block_end) - start;
1859
1860 zero_user(page, start, size);
1861 set_buffer_uptodate(bh);
1862 }
1863
1864 clear_buffer_new(bh);
1865 mark_buffer_dirty(bh);
1866 }
1867 }
1868
1869 block_start = block_end;
1870 bh = bh->b_this_page;
1871 } while (bh != head);
1872}
1873EXPORT_SYMBOL(page_zero_new_buffers);
1874
1875static void
1876iomap_to_bh(struct inode *inode, sector_t block, struct buffer_head *bh,
1877 struct iomap *iomap)
1878{
1879 loff_t offset = block << inode->i_blkbits;
1880
1881 bh->b_bdev = iomap->bdev;
1882
1883
1884
1885
1886
1887
1888
1889 BUG_ON(offset >= iomap->offset + iomap->length);
1890
1891 switch (iomap->type) {
1892 case IOMAP_HOLE:
1893
1894
1895
1896
1897
1898 if (!buffer_uptodate(bh) ||
1899 (offset >= i_size_read(inode)))
1900 set_buffer_new(bh);
1901 break;
1902 case IOMAP_DELALLOC:
1903 if (!buffer_uptodate(bh) ||
1904 (offset >= i_size_read(inode)))
1905 set_buffer_new(bh);
1906 set_buffer_uptodate(bh);
1907 set_buffer_mapped(bh);
1908 set_buffer_delay(bh);
1909 break;
1910 case IOMAP_UNWRITTEN:
1911
1912
1913
1914
1915
1916 set_buffer_new(bh);
1917 set_buffer_unwritten(bh);
1918
1919 case IOMAP_MAPPED:
1920 if ((iomap->flags & IOMAP_F_NEW) ||
1921 offset >= i_size_read(inode))
1922 set_buffer_new(bh);
1923 bh->b_blocknr = (iomap->addr + offset - iomap->offset) >>
1924 inode->i_blkbits;
1925 set_buffer_mapped(bh);
1926 break;
1927 }
1928}
1929
1930int __block_write_begin_int(struct page *page, loff_t pos, unsigned len,
1931 get_block_t *get_block, struct iomap *iomap)
1932{
1933 unsigned from = pos & (PAGE_SIZE - 1);
1934 unsigned to = from + len;
1935 struct inode *inode = page->mapping->host;
1936 unsigned block_start, block_end;
1937 sector_t block;
1938 int err = 0;
1939 unsigned blocksize, bbits;
1940 struct buffer_head *bh, *head, *wait[2], **wait_bh=wait;
1941
1942 BUG_ON(!PageLocked(page));
1943 BUG_ON(from > PAGE_SIZE);
1944 BUG_ON(to > PAGE_SIZE);
1945 BUG_ON(from > to);
1946
1947 head = create_page_buffers(page, inode, 0);
1948 blocksize = head->b_size;
1949 bbits = block_size_bits(blocksize);
1950
1951 block = (sector_t)page->index << (PAGE_SHIFT - bbits);
1952
1953 for(bh = head, block_start = 0; bh != head || !block_start;
1954 block++, block_start=block_end, bh = bh->b_this_page) {
1955 block_end = block_start + blocksize;
1956 if (block_end <= from || block_start >= to) {
1957 if (PageUptodate(page)) {
1958 if (!buffer_uptodate(bh))
1959 set_buffer_uptodate(bh);
1960 }
1961 continue;
1962 }
1963 if (buffer_new(bh))
1964 clear_buffer_new(bh);
1965 if (!buffer_mapped(bh)) {
1966 WARN_ON(bh->b_size != blocksize);
1967 if (get_block) {
1968 err = get_block(inode, block, bh, 1);
1969 if (err)
1970 break;
1971 } else {
1972 iomap_to_bh(inode, block, bh, iomap);
1973 }
1974
1975 if (buffer_new(bh)) {
1976 clean_bdev_bh_alias(bh);
1977 if (PageUptodate(page)) {
1978 clear_buffer_new(bh);
1979 set_buffer_uptodate(bh);
1980 mark_buffer_dirty(bh);
1981 continue;
1982 }
1983 if (block_end > to || block_start < from)
1984 zero_user_segments(page,
1985 to, block_end,
1986 block_start, from);
1987 continue;
1988 }
1989 }
1990 if (PageUptodate(page)) {
1991 if (!buffer_uptodate(bh))
1992 set_buffer_uptodate(bh);
1993 continue;
1994 }
1995 if (!buffer_uptodate(bh) && !buffer_delay(bh) &&
1996 !buffer_unwritten(bh) &&
1997 (block_start < from || block_end > to)) {
1998 ll_rw_block(REQ_OP_READ, 0, 1, &bh);
1999 *wait_bh++=bh;
2000 }
2001 }
2002
2003
2004
2005 while(wait_bh > wait) {
2006 wait_on_buffer(*--wait_bh);
2007 if (!buffer_uptodate(*wait_bh))
2008 err = -EIO;
2009 }
2010 if (unlikely(err))
2011 page_zero_new_buffers(page, from, to);
2012 return err;
2013}
2014
2015int __block_write_begin(struct page *page, loff_t pos, unsigned len,
2016 get_block_t *get_block)
2017{
2018 return __block_write_begin_int(page, pos, len, get_block, NULL);
2019}
2020EXPORT_SYMBOL(__block_write_begin);
2021
2022static int __block_commit_write(struct inode *inode, struct page *page,
2023 unsigned from, unsigned to)
2024{
2025 unsigned block_start, block_end;
2026 int partial = 0;
2027 unsigned blocksize;
2028 struct buffer_head *bh, *head;
2029
2030 bh = head = page_buffers(page);
2031 blocksize = bh->b_size;
2032
2033 block_start = 0;
2034 do {
2035 block_end = block_start + blocksize;
2036 if (block_end <= from || block_start >= to) {
2037 if (!buffer_uptodate(bh))
2038 partial = 1;
2039 } else {
2040 set_buffer_uptodate(bh);
2041 mark_buffer_dirty(bh);
2042 }
2043 clear_buffer_new(bh);
2044
2045 block_start = block_end;
2046 bh = bh->b_this_page;
2047 } while (bh != head);
2048
2049
2050
2051
2052
2053
2054
2055 if (!partial)
2056 SetPageUptodate(page);
2057 return 0;
2058}
2059
2060
2061
2062
2063
2064
2065
2066int block_write_begin(struct address_space *mapping, loff_t pos, unsigned len,
2067 unsigned flags, struct page **pagep, get_block_t *get_block)
2068{
2069 pgoff_t index = pos >> PAGE_SHIFT;
2070 struct page *page;
2071 int status;
2072
2073 page = grab_cache_page_write_begin(mapping, index, flags);
2074 if (!page)
2075 return -ENOMEM;
2076
2077 status = __block_write_begin(page, pos, len, get_block);
2078 if (unlikely(status)) {
2079 unlock_page(page);
2080 put_page(page);
2081 page = NULL;
2082 }
2083
2084 *pagep = page;
2085 return status;
2086}
2087EXPORT_SYMBOL(block_write_begin);
2088
2089void __generic_write_end(struct inode *inode, loff_t pos, unsigned copied,
2090 struct page *page)
2091{
2092 loff_t old_size = inode->i_size;
2093 bool i_size_changed = false;
2094
2095
2096
2097
2098
2099
2100
2101
2102 if (pos + copied > inode->i_size) {
2103 i_size_write(inode, pos + copied);
2104 i_size_changed = true;
2105 }
2106
2107 unlock_page(page);
2108
2109 if (old_size < pos)
2110 pagecache_isize_extended(inode, old_size, pos);
2111
2112
2113
2114
2115
2116
2117 if (i_size_changed)
2118 mark_inode_dirty(inode);
2119}
2120
2121int block_write_end(struct file *file, struct address_space *mapping,
2122 loff_t pos, unsigned len, unsigned copied,
2123 struct page *page, void *fsdata)
2124{
2125 struct inode *inode = mapping->host;
2126 unsigned start;
2127
2128 start = pos & (PAGE_SIZE - 1);
2129
2130 if (unlikely(copied < len)) {
2131
2132
2133
2134
2135
2136
2137
2138
2139
2140
2141
2142
2143 if (!PageUptodate(page))
2144 copied = 0;
2145
2146 page_zero_new_buffers(page, start+copied, start+len);
2147 }
2148 flush_dcache_page(page);
2149
2150
2151 __block_commit_write(inode, page, start, start+copied);
2152
2153 return copied;
2154}
2155EXPORT_SYMBOL(block_write_end);
2156
2157int generic_write_end(struct file *file, struct address_space *mapping,
2158 loff_t pos, unsigned len, unsigned copied,
2159 struct page *page, void *fsdata)
2160{
2161 copied = block_write_end(file, mapping, pos, len, copied, page, fsdata);
2162 __generic_write_end(mapping->host, pos, copied, page);
2163 put_page(page);
2164 return copied;
2165}
2166EXPORT_SYMBOL(generic_write_end);
2167
2168
2169
2170
2171
2172
2173
2174
2175int block_is_partially_uptodate(struct page *page, unsigned long from,
2176 unsigned long count)
2177{
2178 unsigned block_start, block_end, blocksize;
2179 unsigned to;
2180 struct buffer_head *bh, *head;
2181 int ret = 1;
2182
2183 if (!page_has_buffers(page))
2184 return 0;
2185
2186 head = page_buffers(page);
2187 blocksize = head->b_size;
2188 to = min_t(unsigned, PAGE_SIZE - from, count);
2189 to = from + to;
2190 if (from < blocksize && to > PAGE_SIZE - blocksize)
2191 return 0;
2192
2193 bh = head;
2194 block_start = 0;
2195 do {
2196 block_end = block_start + blocksize;
2197 if (block_end > from && block_start < to) {
2198 if (!buffer_uptodate(bh)) {
2199 ret = 0;
2200 break;
2201 }
2202 if (block_end >= to)
2203 break;
2204 }
2205 block_start = block_end;
2206 bh = bh->b_this_page;
2207 } while (bh != head);
2208
2209 return ret;
2210}
2211EXPORT_SYMBOL(block_is_partially_uptodate);
2212
2213
2214
2215
2216
2217
2218
2219
2220int block_read_full_page(struct page *page, get_block_t *get_block)
2221{
2222 struct inode *inode = page->mapping->host;
2223 sector_t iblock, lblock;
2224 struct buffer_head *bh, *head, *arr[MAX_BUF_PER_PAGE];
2225 unsigned int blocksize, bbits;
2226 int nr, i;
2227 int fully_mapped = 1;
2228
2229 head = create_page_buffers(page, inode, 0);
2230 blocksize = head->b_size;
2231 bbits = block_size_bits(blocksize);
2232
2233 iblock = (sector_t)page->index << (PAGE_SHIFT - bbits);
2234 lblock = (i_size_read(inode)+blocksize-1) >> bbits;
2235 bh = head;
2236 nr = 0;
2237 i = 0;
2238
2239 do {
2240 if (buffer_uptodate(bh))
2241 continue;
2242
2243 if (!buffer_mapped(bh)) {
2244 int err = 0;
2245
2246 fully_mapped = 0;
2247 if (iblock < lblock) {
2248 WARN_ON(bh->b_size != blocksize);
2249 err = get_block(inode, iblock, bh, 0);
2250 if (err)
2251 SetPageError(page);
2252 }
2253 if (!buffer_mapped(bh)) {
2254 zero_user(page, i * blocksize, blocksize);
2255 if (!err)
2256 set_buffer_uptodate(bh);
2257 continue;
2258 }
2259
2260
2261
2262
2263 if (buffer_uptodate(bh))
2264 continue;
2265 }
2266 arr[nr++] = bh;
2267 } while (i++, iblock++, (bh = bh->b_this_page) != head);
2268
2269 if (fully_mapped)
2270 SetPageMappedToDisk(page);
2271
2272 if (!nr) {
2273
2274
2275
2276
2277 if (!PageError(page))
2278 SetPageUptodate(page);
2279 unlock_page(page);
2280 return 0;
2281 }
2282
2283
2284 for (i = 0; i < nr; i++) {
2285 bh = arr[i];
2286 lock_buffer(bh);
2287 mark_buffer_async_read(bh);
2288 }
2289
2290
2291
2292
2293
2294
2295 for (i = 0; i < nr; i++) {
2296 bh = arr[i];
2297 if (buffer_uptodate(bh))
2298 end_buffer_async_read(bh, 1);
2299 else
2300 submit_bh(REQ_OP_READ, 0, bh);
2301 }
2302 return 0;
2303}
2304EXPORT_SYMBOL(block_read_full_page);
2305
2306
2307
2308
2309
2310int generic_cont_expand_simple(struct inode *inode, loff_t size)
2311{
2312 struct address_space *mapping = inode->i_mapping;
2313 struct page *page;
2314 void *fsdata;
2315 int err;
2316
2317 err = inode_newsize_ok(inode, size);
2318 if (err)
2319 goto out;
2320
2321 err = pagecache_write_begin(NULL, mapping, size, 0,
2322 AOP_FLAG_CONT_EXPAND, &page, &fsdata);
2323 if (err)
2324 goto out;
2325
2326 err = pagecache_write_end(NULL, mapping, size, 0, 0, page, fsdata);
2327 BUG_ON(err > 0);
2328
2329out:
2330 return err;
2331}
2332EXPORT_SYMBOL(generic_cont_expand_simple);
2333
2334static int cont_expand_zero(struct file *file, struct address_space *mapping,
2335 loff_t pos, loff_t *bytes)
2336{
2337 struct inode *inode = mapping->host;
2338 unsigned int blocksize = i_blocksize(inode);
2339 struct page *page;
2340 void *fsdata;
2341 pgoff_t index, curidx;
2342 loff_t curpos;
2343 unsigned zerofrom, offset, len;
2344 int err = 0;
2345
2346 index = pos >> PAGE_SHIFT;
2347 offset = pos & ~PAGE_MASK;
2348
2349 while (index > (curidx = (curpos = *bytes)>>PAGE_SHIFT)) {
2350 zerofrom = curpos & ~PAGE_MASK;
2351 if (zerofrom & (blocksize-1)) {
2352 *bytes |= (blocksize-1);
2353 (*bytes)++;
2354 }
2355 len = PAGE_SIZE - zerofrom;
2356
2357 err = pagecache_write_begin(file, mapping, curpos, len, 0,
2358 &page, &fsdata);
2359 if (err)
2360 goto out;
2361 zero_user(page, zerofrom, len);
2362 err = pagecache_write_end(file, mapping, curpos, len, len,
2363 page, fsdata);
2364 if (err < 0)
2365 goto out;
2366 BUG_ON(err != len);
2367 err = 0;
2368
2369 balance_dirty_pages_ratelimited(mapping);
2370
2371 if (fatal_signal_pending(current)) {
2372 err = -EINTR;
2373 goto out;
2374 }
2375 }
2376
2377
2378 if (index == curidx) {
2379 zerofrom = curpos & ~PAGE_MASK;
2380
2381 if (offset <= zerofrom) {
2382 goto out;
2383 }
2384 if (zerofrom & (blocksize-1)) {
2385 *bytes |= (blocksize-1);
2386 (*bytes)++;
2387 }
2388 len = offset - zerofrom;
2389
2390 err = pagecache_write_begin(file, mapping, curpos, len, 0,
2391 &page, &fsdata);
2392 if (err)
2393 goto out;
2394 zero_user(page, zerofrom, len);
2395 err = pagecache_write_end(file, mapping, curpos, len, len,
2396 page, fsdata);
2397 if (err < 0)
2398 goto out;
2399 BUG_ON(err != len);
2400 err = 0;
2401 }
2402out:
2403 return err;
2404}
2405
2406
2407
2408
2409
2410int cont_write_begin(struct file *file, struct address_space *mapping,
2411 loff_t pos, unsigned len, unsigned flags,
2412 struct page **pagep, void **fsdata,
2413 get_block_t *get_block, loff_t *bytes)
2414{
2415 struct inode *inode = mapping->host;
2416 unsigned int blocksize = i_blocksize(inode);
2417 unsigned int zerofrom;
2418 int err;
2419
2420 err = cont_expand_zero(file, mapping, pos, bytes);
2421 if (err)
2422 return err;
2423
2424 zerofrom = *bytes & ~PAGE_MASK;
2425 if (pos+len > *bytes && zerofrom & (blocksize-1)) {
2426 *bytes |= (blocksize-1);
2427 (*bytes)++;
2428 }
2429
2430 return block_write_begin(mapping, pos, len, flags, pagep, get_block);
2431}
2432EXPORT_SYMBOL(cont_write_begin);
2433
2434int block_commit_write(struct page *page, unsigned from, unsigned to)
2435{
2436 struct inode *inode = page->mapping->host;
2437 __block_commit_write(inode,page,from,to);
2438 return 0;
2439}
2440EXPORT_SYMBOL(block_commit_write);
2441
2442
2443
2444
2445
2446
2447
2448
2449
2450
2451
2452
2453
2454
2455
2456
2457
2458
2459
2460int block_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf,
2461 get_block_t get_block)
2462{
2463 struct page *page = vmf->page;
2464 struct inode *inode = file_inode(vma->vm_file);
2465 unsigned long end;
2466 loff_t size;
2467 int ret;
2468
2469 lock_page(page);
2470 size = i_size_read(inode);
2471 if ((page->mapping != inode->i_mapping) ||
2472 (page_offset(page) > size)) {
2473
2474 ret = -EFAULT;
2475 goto out_unlock;
2476 }
2477
2478
2479 if (((page->index + 1) << PAGE_SHIFT) > size)
2480 end = size & ~PAGE_MASK;
2481 else
2482 end = PAGE_SIZE;
2483
2484 ret = __block_write_begin(page, 0, end, get_block);
2485 if (!ret)
2486 ret = block_commit_write(page, 0, end);
2487
2488 if (unlikely(ret < 0))
2489 goto out_unlock;
2490 set_page_dirty(page);
2491 wait_for_stable_page(page);
2492 return 0;
2493out_unlock:
2494 unlock_page(page);
2495 return ret;
2496}
2497EXPORT_SYMBOL(block_page_mkwrite);
2498
2499
2500
2501
2502
2503
2504static void end_buffer_read_nobh(struct buffer_head *bh, int uptodate)
2505{
2506 __end_buffer_read_notouch(bh, uptodate);
2507}
2508
2509
2510
2511
2512
2513
2514static void attach_nobh_buffers(struct page *page, struct buffer_head *head)
2515{
2516 struct buffer_head *bh;
2517
2518 BUG_ON(!PageLocked(page));
2519
2520 spin_lock(&page->mapping->private_lock);
2521 bh = head;
2522 do {
2523 if (PageDirty(page))
2524 set_buffer_dirty(bh);
2525 if (!bh->b_this_page)
2526 bh->b_this_page = head;
2527 bh = bh->b_this_page;
2528 } while (bh != head);
2529 attach_page_buffers(page, head);
2530 spin_unlock(&page->mapping->private_lock);
2531}
2532
2533
2534
2535
2536
2537
2538int nobh_write_begin(struct address_space *mapping,
2539 loff_t pos, unsigned len, unsigned flags,
2540 struct page **pagep, void **fsdata,
2541 get_block_t *get_block)
2542{
2543 struct inode *inode = mapping->host;
2544 const unsigned blkbits = inode->i_blkbits;
2545 const unsigned blocksize = 1 << blkbits;
2546 struct buffer_head *head, *bh;
2547 struct page *page;
2548 pgoff_t index;
2549 unsigned from, to;
2550 unsigned block_in_page;
2551 unsigned block_start, block_end;
2552 sector_t block_in_file;
2553 int nr_reads = 0;
2554 int ret = 0;
2555 int is_mapped_to_disk = 1;
2556
2557 index = pos >> PAGE_SHIFT;
2558 from = pos & (PAGE_SIZE - 1);
2559 to = from + len;
2560
2561 page = grab_cache_page_write_begin(mapping, index, flags);
2562 if (!page)
2563 return -ENOMEM;
2564 *pagep = page;
2565 *fsdata = NULL;
2566
2567 if (page_has_buffers(page)) {
2568 ret = __block_write_begin(page, pos, len, get_block);
2569 if (unlikely(ret))
2570 goto out_release;
2571 return ret;
2572 }
2573
2574 if (PageMappedToDisk(page))
2575 return 0;
2576
2577
2578
2579
2580
2581
2582
2583
2584
2585
2586 head = alloc_page_buffers(page, blocksize, false);
2587 if (!head) {
2588 ret = -ENOMEM;
2589 goto out_release;
2590 }
2591
2592 block_in_file = (sector_t)page->index << (PAGE_SHIFT - blkbits);
2593
2594
2595
2596
2597
2598
2599 for (block_start = 0, block_in_page = 0, bh = head;
2600 block_start < PAGE_SIZE;
2601 block_in_page++, block_start += blocksize, bh = bh->b_this_page) {
2602 int create;
2603
2604 block_end = block_start + blocksize;
2605 bh->b_state = 0;
2606 create = 1;
2607 if (block_start >= to)
2608 create = 0;
2609 ret = get_block(inode, block_in_file + block_in_page,
2610 bh, create);
2611 if (ret)
2612 goto failed;
2613 if (!buffer_mapped(bh))
2614 is_mapped_to_disk = 0;
2615 if (buffer_new(bh))
2616 clean_bdev_bh_alias(bh);
2617 if (PageUptodate(page)) {
2618 set_buffer_uptodate(bh);
2619 continue;
2620 }
2621 if (buffer_new(bh) || !buffer_mapped(bh)) {
2622 zero_user_segments(page, block_start, from,
2623 to, block_end);
2624 continue;
2625 }
2626 if (buffer_uptodate(bh))
2627 continue;
2628 if (block_start < from || block_end > to) {
2629 lock_buffer(bh);
2630 bh->b_end_io = end_buffer_read_nobh;
2631 submit_bh(REQ_OP_READ, 0, bh);
2632 nr_reads++;
2633 }
2634 }
2635
2636 if (nr_reads) {
2637
2638
2639
2640
2641
2642 for (bh = head; bh; bh = bh->b_this_page) {
2643 wait_on_buffer(bh);
2644 if (!buffer_uptodate(bh))
2645 ret = -EIO;
2646 }
2647 if (ret)
2648 goto failed;
2649 }
2650
2651 if (is_mapped_to_disk)
2652 SetPageMappedToDisk(page);
2653
2654 *fsdata = head;
2655
2656 return 0;
2657
2658failed:
2659 BUG_ON(!ret);
2660
2661
2662
2663
2664
2665
2666
2667 attach_nobh_buffers(page, head);
2668 page_zero_new_buffers(page, from, to);
2669
2670out_release:
2671 unlock_page(page);
2672 put_page(page);
2673 *pagep = NULL;
2674
2675 return ret;
2676}
2677EXPORT_SYMBOL(nobh_write_begin);
2678
2679int nobh_write_end(struct file *file, struct address_space *mapping,
2680 loff_t pos, unsigned len, unsigned copied,
2681 struct page *page, void *fsdata)
2682{
2683 struct inode *inode = page->mapping->host;
2684 struct buffer_head *head = fsdata;
2685 struct buffer_head *bh;
2686 BUG_ON(fsdata != NULL && page_has_buffers(page));
2687
2688 if (unlikely(copied < len) && head)
2689 attach_nobh_buffers(page, head);
2690 if (page_has_buffers(page))
2691 return generic_write_end(file, mapping, pos, len,
2692 copied, page, fsdata);
2693
2694 SetPageUptodate(page);
2695 set_page_dirty(page);
2696 if (pos+copied > inode->i_size) {
2697 i_size_write(inode, pos+copied);
2698 mark_inode_dirty(inode);
2699 }
2700
2701 unlock_page(page);
2702 put_page(page);
2703
2704 while (head) {
2705 bh = head;
2706 head = head->b_this_page;
2707 free_buffer_head(bh);
2708 }
2709
2710 return copied;
2711}
2712EXPORT_SYMBOL(nobh_write_end);
2713
2714
2715
2716
2717
2718
2719int nobh_writepage(struct page *page, get_block_t *get_block,
2720 struct writeback_control *wbc)
2721{
2722 struct inode * const inode = page->mapping->host;
2723 loff_t i_size = i_size_read(inode);
2724 const pgoff_t end_index = i_size >> PAGE_SHIFT;
2725 unsigned offset;
2726 int ret;
2727
2728
2729 if (page->index < end_index)
2730 goto out;
2731
2732
2733 offset = i_size & (PAGE_SIZE-1);
2734 if (page->index >= end_index+1 || !offset) {
2735
2736
2737
2738
2739
2740#if 0
2741
2742 if (page->mapping->a_ops->invalidatepage)
2743 page->mapping->a_ops->invalidatepage(page, offset);
2744#endif
2745 unlock_page(page);
2746 return 0;
2747 }
2748
2749
2750
2751
2752
2753
2754
2755
2756 zero_user_segment(page, offset, PAGE_SIZE);
2757out:
2758 ret = mpage_writepage(page, get_block, wbc);
2759 if (ret == -EAGAIN)
2760 ret = __block_write_full_page(inode, page, get_block, wbc,
2761 end_buffer_async_write);
2762 return ret;
2763}
2764EXPORT_SYMBOL(nobh_writepage);
2765
2766int nobh_truncate_page(struct address_space *mapping,
2767 loff_t from, get_block_t *get_block)
2768{
2769 pgoff_t index = from >> PAGE_SHIFT;
2770 unsigned offset = from & (PAGE_SIZE-1);
2771 unsigned blocksize;
2772 sector_t iblock;
2773 unsigned length, pos;
2774 struct inode *inode = mapping->host;
2775 struct page *page;
2776 struct buffer_head map_bh;
2777 int err;
2778
2779 blocksize = i_blocksize(inode);
2780 length = offset & (blocksize - 1);
2781
2782
2783 if (!length)
2784 return 0;
2785
2786 length = blocksize - length;
2787 iblock = (sector_t)index << (PAGE_SHIFT - inode->i_blkbits);
2788
2789 page = grab_cache_page(mapping, index);
2790 err = -ENOMEM;
2791 if (!page)
2792 goto out;
2793
2794 if (page_has_buffers(page)) {
2795has_buffers:
2796 unlock_page(page);
2797 put_page(page);
2798 return block_truncate_page(mapping, from, get_block);
2799 }
2800
2801
2802 pos = blocksize;
2803 while (offset >= pos) {
2804 iblock++;
2805 pos += blocksize;
2806 }
2807
2808 map_bh.b_size = blocksize;
2809 map_bh.b_state = 0;
2810 err = get_block(inode, iblock, &map_bh, 0);
2811 if (err)
2812 goto unlock;
2813
2814 if (!buffer_mapped(&map_bh))
2815 goto unlock;
2816
2817
2818 if (!PageUptodate(page)) {
2819 err = mapping->a_ops->readpage(NULL, page);
2820 if (err) {
2821 put_page(page);
2822 goto out;
2823 }
2824 lock_page(page);
2825 if (!PageUptodate(page)) {
2826 err = -EIO;
2827 goto unlock;
2828 }
2829 if (page_has_buffers(page))
2830 goto has_buffers;
2831 }
2832 zero_user(page, offset, length);
2833 set_page_dirty(page);
2834 err = 0;
2835
2836unlock:
2837 unlock_page(page);
2838 put_page(page);
2839out:
2840 return err;
2841}
2842EXPORT_SYMBOL(nobh_truncate_page);
2843
2844int block_truncate_page(struct address_space *mapping,
2845 loff_t from, get_block_t *get_block)
2846{
2847 pgoff_t index = from >> PAGE_SHIFT;
2848 unsigned offset = from & (PAGE_SIZE-1);
2849 unsigned blocksize;
2850 sector_t iblock;
2851 unsigned length, pos;
2852 struct inode *inode = mapping->host;
2853 struct page *page;
2854 struct buffer_head *bh;
2855 int err;
2856
2857 blocksize = i_blocksize(inode);
2858 length = offset & (blocksize - 1);
2859
2860
2861 if (!length)
2862 return 0;
2863
2864 length = blocksize - length;
2865 iblock = (sector_t)index << (PAGE_SHIFT - inode->i_blkbits);
2866
2867 page = grab_cache_page(mapping, index);
2868 err = -ENOMEM;
2869 if (!page)
2870 goto out;
2871
2872 if (!page_has_buffers(page))
2873 create_empty_buffers(page, blocksize, 0);
2874
2875
2876 bh = page_buffers(page);
2877 pos = blocksize;
2878 while (offset >= pos) {
2879 bh = bh->b_this_page;
2880 iblock++;
2881 pos += blocksize;
2882 }
2883
2884 err = 0;
2885 if (!buffer_mapped(bh)) {
2886 WARN_ON(bh->b_size != blocksize);
2887 err = get_block(inode, iblock, bh, 0);
2888 if (err)
2889 goto unlock;
2890
2891 if (!buffer_mapped(bh))
2892 goto unlock;
2893 }
2894
2895
2896 if (PageUptodate(page))
2897 set_buffer_uptodate(bh);
2898
2899 if (!buffer_uptodate(bh) && !buffer_delay(bh) && !buffer_unwritten(bh)) {
2900 err = -EIO;
2901 ll_rw_block(REQ_OP_READ, 0, 1, &bh);
2902 wait_on_buffer(bh);
2903
2904 if (!buffer_uptodate(bh))
2905 goto unlock;
2906 }
2907
2908 zero_user(page, offset, length);
2909 mark_buffer_dirty(bh);
2910 err = 0;
2911
2912unlock:
2913 unlock_page(page);
2914 put_page(page);
2915out:
2916 return err;
2917}
2918EXPORT_SYMBOL(block_truncate_page);
2919
2920
2921
2922
2923int block_write_full_page(struct page *page, get_block_t *get_block,
2924 struct writeback_control *wbc)
2925{
2926 struct inode * const inode = page->mapping->host;
2927 loff_t i_size = i_size_read(inode);
2928 const pgoff_t end_index = i_size >> PAGE_SHIFT;
2929 unsigned offset;
2930
2931
2932 if (page->index < end_index)
2933 return __block_write_full_page(inode, page, get_block, wbc,
2934 end_buffer_async_write);
2935
2936
2937 offset = i_size & (PAGE_SIZE-1);
2938 if (page->index >= end_index+1 || !offset) {
2939
2940
2941
2942
2943
2944 do_invalidatepage(page, 0, PAGE_SIZE);
2945 unlock_page(page);
2946 return 0;
2947 }
2948
2949
2950
2951
2952
2953
2954
2955
2956 zero_user_segment(page, offset, PAGE_SIZE);
2957 return __block_write_full_page(inode, page, get_block, wbc,
2958 end_buffer_async_write);
2959}
2960EXPORT_SYMBOL(block_write_full_page);
2961
2962sector_t generic_block_bmap(struct address_space *mapping, sector_t block,
2963 get_block_t *get_block)
2964{
2965 struct inode *inode = mapping->host;
2966 struct buffer_head tmp = {
2967 .b_size = i_blocksize(inode),
2968 };
2969
2970 get_block(inode, block, &tmp, 0);
2971 return tmp.b_blocknr;
2972}
2973EXPORT_SYMBOL(generic_block_bmap);
2974
2975static void end_bio_bh_io_sync(struct bio *bio)
2976{
2977 struct buffer_head *bh = bio->bi_private;
2978
2979 if (unlikely(bio_flagged(bio, BIO_QUIET)))
2980 set_bit(BH_Quiet, &bh->b_state);
2981
2982 bh->b_end_io(bh, !bio->bi_status);
2983 bio_put(bio);
2984}
2985
2986
2987
2988
2989
2990
2991
2992
2993
2994
2995
2996
2997
2998void guard_bio_eod(int op, struct bio *bio)
2999{
3000 sector_t maxsector;
3001 struct bio_vec *bvec = bio_last_bvec_all(bio);
3002 unsigned truncated_bytes;
3003 struct hd_struct *part;
3004
3005 rcu_read_lock();
3006 part = __disk_get_part(bio->bi_disk, bio->bi_partno);
3007 if (part)
3008 maxsector = part_nr_sects_read(part);
3009 else
3010 maxsector = get_capacity(bio->bi_disk);
3011 rcu_read_unlock();
3012
3013 if (!maxsector)
3014 return;
3015
3016
3017
3018
3019
3020
3021 if (unlikely(bio->bi_iter.bi_sector >= maxsector))
3022 return;
3023
3024 maxsector -= bio->bi_iter.bi_sector;
3025 if (likely((bio->bi_iter.bi_size >> 9) <= maxsector))
3026 return;
3027
3028
3029 truncated_bytes = bio->bi_iter.bi_size - (maxsector << 9);
3030
3031
3032
3033
3034
3035 if (truncated_bytes > bvec->bv_len)
3036 return;
3037
3038
3039 bio->bi_iter.bi_size -= truncated_bytes;
3040 bvec->bv_len -= truncated_bytes;
3041
3042
3043 if (op == REQ_OP_READ) {
3044 struct bio_vec bv;
3045
3046 mp_bvec_last_segment(bvec, &bv);
3047 zero_user(bv.bv_page, bv.bv_offset + bv.bv_len,
3048 truncated_bytes);
3049 }
3050}
3051
3052static int submit_bh_wbc(int op, int op_flags, struct buffer_head *bh,
3053 enum rw_hint write_hint, struct writeback_control *wbc)
3054{
3055 struct bio *bio;
3056
3057 BUG_ON(!buffer_locked(bh));
3058 BUG_ON(!buffer_mapped(bh));
3059 BUG_ON(!bh->b_end_io);
3060 BUG_ON(buffer_delay(bh));
3061 BUG_ON(buffer_unwritten(bh));
3062
3063
3064
3065
3066 if (test_set_buffer_req(bh) && (op == REQ_OP_WRITE))
3067 clear_buffer_write_io_error(bh);
3068
3069
3070
3071
3072
3073 bio = bio_alloc(GFP_NOIO, 1);
3074
3075 bio->bi_iter.bi_sector = bh->b_blocknr * (bh->b_size >> 9);
3076 bio_set_dev(bio, bh->b_bdev);
3077 bio->bi_write_hint = write_hint;
3078
3079 bio_add_page(bio, bh->b_page, bh->b_size, bh_offset(bh));
3080 BUG_ON(bio->bi_iter.bi_size != bh->b_size);
3081
3082 bio->bi_end_io = end_bio_bh_io_sync;
3083 bio->bi_private = bh;
3084
3085
3086 guard_bio_eod(op, bio);
3087
3088 if (buffer_meta(bh))
3089 op_flags |= REQ_META;
3090 if (buffer_prio(bh))
3091 op_flags |= REQ_PRIO;
3092 bio_set_op_attrs(bio, op, op_flags);
3093
3094 if (wbc) {
3095 wbc_init_bio(wbc, bio);
3096 wbc_account_io(wbc, bh->b_page, bh->b_size);
3097 }
3098
3099 submit_bio(bio);
3100 return 0;
3101}
3102
3103int submit_bh(int op, int op_flags, struct buffer_head *bh)
3104{
3105 return submit_bh_wbc(op, op_flags, bh, 0, NULL);
3106}
3107EXPORT_SYMBOL(submit_bh);
3108
3109
3110
3111
3112
3113
3114
3115
3116
3117
3118
3119
3120
3121
3122
3123
3124
3125
3126
3127
3128
3129
3130
3131
3132
3133
3134
3135void ll_rw_block(int op, int op_flags, int nr, struct buffer_head *bhs[])
3136{
3137 int i;
3138
3139 for (i = 0; i < nr; i++) {
3140 struct buffer_head *bh = bhs[i];
3141
3142 if (!trylock_buffer(bh))
3143 continue;
3144 if (op == WRITE) {
3145 if (test_clear_buffer_dirty(bh)) {
3146 bh->b_end_io = end_buffer_write_sync;
3147 get_bh(bh);
3148 submit_bh(op, op_flags, bh);
3149 continue;
3150 }
3151 } else {
3152 if (!buffer_uptodate(bh)) {
3153 bh->b_end_io = end_buffer_read_sync;
3154 get_bh(bh);
3155 submit_bh(op, op_flags, bh);
3156 continue;
3157 }
3158 }
3159 unlock_buffer(bh);
3160 }
3161}
3162EXPORT_SYMBOL(ll_rw_block);
3163
3164void write_dirty_buffer(struct buffer_head *bh, int op_flags)
3165{
3166 lock_buffer(bh);
3167 if (!test_clear_buffer_dirty(bh)) {
3168 unlock_buffer(bh);
3169 return;
3170 }
3171 bh->b_end_io = end_buffer_write_sync;
3172 get_bh(bh);
3173 submit_bh(REQ_OP_WRITE, op_flags, bh);
3174}
3175EXPORT_SYMBOL(write_dirty_buffer);
3176
3177
3178
3179
3180
3181
3182int __sync_dirty_buffer(struct buffer_head *bh, int op_flags)
3183{
3184 int ret = 0;
3185
3186 WARN_ON(atomic_read(&bh->b_count) < 1);
3187 lock_buffer(bh);
3188 if (test_clear_buffer_dirty(bh)) {
3189 get_bh(bh);
3190 bh->b_end_io = end_buffer_write_sync;
3191 ret = submit_bh(REQ_OP_WRITE, op_flags, bh);
3192 wait_on_buffer(bh);
3193 if (!ret && !buffer_uptodate(bh))
3194 ret = -EIO;
3195 } else {
3196 unlock_buffer(bh);
3197 }
3198 return ret;
3199}
3200EXPORT_SYMBOL(__sync_dirty_buffer);
3201
3202int sync_dirty_buffer(struct buffer_head *bh)
3203{
3204 return __sync_dirty_buffer(bh, REQ_SYNC);
3205}
3206EXPORT_SYMBOL(sync_dirty_buffer);
3207
3208
3209
3210
3211
3212
3213
3214
3215
3216
3217
3218
3219
3220
3221
3222
3223
3224
3225
3226
3227
3228static inline int buffer_busy(struct buffer_head *bh)
3229{
3230 return atomic_read(&bh->b_count) |
3231 (bh->b_state & ((1 << BH_Dirty) | (1 << BH_Lock)));
3232}
3233
3234static int
3235drop_buffers(struct page *page, struct buffer_head **buffers_to_free)
3236{
3237 struct buffer_head *head = page_buffers(page);
3238 struct buffer_head *bh;
3239
3240 bh = head;
3241 do {
3242 if (buffer_busy(bh))
3243 goto failed;
3244 bh = bh->b_this_page;
3245 } while (bh != head);
3246
3247 do {
3248 struct buffer_head *next = bh->b_this_page;
3249
3250 if (bh->b_assoc_map)
3251 __remove_assoc_queue(bh);
3252 bh = next;
3253 } while (bh != head);
3254 *buffers_to_free = head;
3255 __clear_page_buffers(page);
3256 return 1;
3257failed:
3258 return 0;
3259}
3260
3261int try_to_free_buffers(struct page *page)
3262{
3263 struct address_space * const mapping = page->mapping;
3264 struct buffer_head *buffers_to_free = NULL;
3265 int ret = 0;
3266
3267 BUG_ON(!PageLocked(page));
3268 if (PageWriteback(page))
3269 return 0;
3270
3271 if (mapping == NULL) {
3272 ret = drop_buffers(page, &buffers_to_free);
3273 goto out;
3274 }
3275
3276 spin_lock(&mapping->private_lock);
3277 ret = drop_buffers(page, &buffers_to_free);
3278
3279
3280
3281
3282
3283
3284
3285
3286
3287
3288
3289
3290
3291
3292
3293 if (ret)
3294 cancel_dirty_page(page);
3295 spin_unlock(&mapping->private_lock);
3296out:
3297 if (buffers_to_free) {
3298 struct buffer_head *bh = buffers_to_free;
3299
3300 do {
3301 struct buffer_head *next = bh->b_this_page;
3302 free_buffer_head(bh);
3303 bh = next;
3304 } while (bh != buffers_to_free);
3305 }
3306 return ret;
3307}
3308EXPORT_SYMBOL(try_to_free_buffers);
3309
3310
3311
3312
3313
3314
3315
3316
3317SYSCALL_DEFINE2(bdflush, int, func, long, data)
3318{
3319 static int msg_count;
3320
3321 if (!capable(CAP_SYS_ADMIN))
3322 return -EPERM;
3323
3324 if (msg_count < 5) {
3325 msg_count++;
3326 printk(KERN_INFO
3327 "warning: process `%s' used the obsolete bdflush"
3328 " system call\n", current->comm);
3329 printk(KERN_INFO "Fix your initscripts?\n");
3330 }
3331
3332 if (func == 1)
3333 do_exit(0);
3334 return 0;
3335}
3336
3337
3338
3339
3340static struct kmem_cache *bh_cachep __read_mostly;
3341
3342
3343
3344
3345
3346static unsigned long max_buffer_heads;
3347
3348int buffer_heads_over_limit;
3349
3350struct bh_accounting {
3351 int nr;
3352 int ratelimit;
3353};
3354
3355static DEFINE_PER_CPU(struct bh_accounting, bh_accounting) = {0, 0};
3356
3357static void recalc_bh_state(void)
3358{
3359 int i;
3360 int tot = 0;
3361
3362 if (__this_cpu_inc_return(bh_accounting.ratelimit) - 1 < 4096)
3363 return;
3364 __this_cpu_write(bh_accounting.ratelimit, 0);
3365 for_each_online_cpu(i)
3366 tot += per_cpu(bh_accounting, i).nr;
3367 buffer_heads_over_limit = (tot > max_buffer_heads);
3368}
3369
3370struct buffer_head *alloc_buffer_head(gfp_t gfp_flags)
3371{
3372 struct buffer_head *ret = kmem_cache_zalloc(bh_cachep, gfp_flags);
3373 if (ret) {
3374 INIT_LIST_HEAD(&ret->b_assoc_buffers);
3375 preempt_disable();
3376 __this_cpu_inc(bh_accounting.nr);
3377 recalc_bh_state();
3378 preempt_enable();
3379 }
3380 return ret;
3381}
3382EXPORT_SYMBOL(alloc_buffer_head);
3383
3384void free_buffer_head(struct buffer_head *bh)
3385{
3386 BUG_ON(!list_empty(&bh->b_assoc_buffers));
3387 kmem_cache_free(bh_cachep, bh);
3388 preempt_disable();
3389 __this_cpu_dec(bh_accounting.nr);
3390 recalc_bh_state();
3391 preempt_enable();
3392}
3393EXPORT_SYMBOL(free_buffer_head);
3394
3395static int buffer_exit_cpu_dead(unsigned int cpu)
3396{
3397 int i;
3398 struct bh_lru *b = &per_cpu(bh_lrus, cpu);
3399
3400 for (i = 0; i < BH_LRU_SIZE; i++) {
3401 brelse(b->bhs[i]);
3402 b->bhs[i] = NULL;
3403 }
3404 this_cpu_add(bh_accounting.nr, per_cpu(bh_accounting, cpu).nr);
3405 per_cpu(bh_accounting, cpu).nr = 0;
3406 return 0;
3407}
3408
3409
3410
3411
3412
3413
3414
3415
3416int bh_uptodate_or_lock(struct buffer_head *bh)
3417{
3418 if (!buffer_uptodate(bh)) {
3419 lock_buffer(bh);
3420 if (!buffer_uptodate(bh))
3421 return 0;
3422 unlock_buffer(bh);
3423 }
3424 return 1;
3425}
3426EXPORT_SYMBOL(bh_uptodate_or_lock);
3427
3428
3429
3430
3431
3432
3433
3434int bh_submit_read(struct buffer_head *bh)
3435{
3436 BUG_ON(!buffer_locked(bh));
3437
3438 if (buffer_uptodate(bh)) {
3439 unlock_buffer(bh);
3440 return 0;
3441 }
3442
3443 get_bh(bh);
3444 bh->b_end_io = end_buffer_read_sync;
3445 submit_bh(REQ_OP_READ, 0, bh);
3446 wait_on_buffer(bh);
3447 if (buffer_uptodate(bh))
3448 return 0;
3449 return -EIO;
3450}
3451EXPORT_SYMBOL(bh_submit_read);
3452
3453void __init buffer_init(void)
3454{
3455 unsigned long nrpages;
3456 int ret;
3457
3458 bh_cachep = kmem_cache_create("buffer_head",
3459 sizeof(struct buffer_head), 0,
3460 (SLAB_RECLAIM_ACCOUNT|SLAB_PANIC|
3461 SLAB_MEM_SPREAD),
3462 NULL);
3463
3464
3465
3466
3467 nrpages = (nr_free_buffer_pages() * 10) / 100;
3468 max_buffer_heads = nrpages * (PAGE_SIZE / sizeof(struct buffer_head));
3469 ret = cpuhp_setup_state_nocalls(CPUHP_FS_BUFF_DEAD, "fs/buffer:dead",
3470 NULL, buffer_exit_cpu_dead);
3471 WARN_ON(ret < 0);
3472}
3473