1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22#include <linux/kernel.h>
23#include <linux/sched/signal.h>
24#include <linux/syscalls.h>
25#include <linux/fs.h>
26#include <linux/iomap.h>
27#include <linux/mm.h>
28#include <linux/percpu.h>
29#include <linux/slab.h>
30#include <linux/capability.h>
31#include <linux/blkdev.h>
32#include <linux/file.h>
33#include <linux/quotaops.h>
34#include <linux/highmem.h>
35#include <linux/export.h>
36#include <linux/backing-dev.h>
37#include <linux/writeback.h>
38#include <linux/hash.h>
39#include <linux/suspend.h>
40#include <linux/buffer_head.h>
41#include <linux/task_io_accounting_ops.h>
42#include <linux/bio.h>
43#include <linux/cpu.h>
44#include <linux/bitops.h>
45#include <linux/mpage.h>
46#include <linux/bit_spinlock.h>
47#include <linux/pagevec.h>
48#include <linux/sched/mm.h>
49#include <trace/events/block.h>
50#include <linux/fscrypt.h>
51
52#include "internal.h"
53
54static int fsync_buffers_list(spinlock_t *lock, struct list_head *list);
55static int submit_bh_wbc(int op, int op_flags, struct buffer_head *bh,
56 enum rw_hint hint, struct writeback_control *wbc);
57
58#define BH_ENTRY(list) list_entry((list), struct buffer_head, b_assoc_buffers)
59
60inline void touch_buffer(struct buffer_head *bh)
61{
62 trace_block_touch_buffer(bh);
63 mark_page_accessed(bh->b_page);
64}
65EXPORT_SYMBOL(touch_buffer);
66
67void __lock_buffer(struct buffer_head *bh)
68{
69 wait_on_bit_lock_io(&bh->b_state, BH_Lock, TASK_UNINTERRUPTIBLE);
70}
71EXPORT_SYMBOL(__lock_buffer);
72
73void unlock_buffer(struct buffer_head *bh)
74{
75 clear_bit_unlock(BH_Lock, &bh->b_state);
76 smp_mb__after_atomic();
77 wake_up_bit(&bh->b_state, BH_Lock);
78}
79EXPORT_SYMBOL(unlock_buffer);
80
81
82
83
84
85
86void buffer_check_dirty_writeback(struct page *page,
87 bool *dirty, bool *writeback)
88{
89 struct buffer_head *head, *bh;
90 *dirty = false;
91 *writeback = false;
92
93 BUG_ON(!PageLocked(page));
94
95 if (!page_has_buffers(page))
96 return;
97
98 if (PageWriteback(page))
99 *writeback = true;
100
101 head = page_buffers(page);
102 bh = head;
103 do {
104 if (buffer_locked(bh))
105 *writeback = true;
106
107 if (buffer_dirty(bh))
108 *dirty = true;
109
110 bh = bh->b_this_page;
111 } while (bh != head);
112}
113EXPORT_SYMBOL(buffer_check_dirty_writeback);
114
115
116
117
118
119
120void __wait_on_buffer(struct buffer_head * bh)
121{
122 wait_on_bit_io(&bh->b_state, BH_Lock, TASK_UNINTERRUPTIBLE);
123}
124EXPORT_SYMBOL(__wait_on_buffer);
125
126static void
127__clear_page_buffers(struct page *page)
128{
129 ClearPagePrivate(page);
130 set_page_private(page, 0);
131 put_page(page);
132}
133
134static void buffer_io_error(struct buffer_head *bh, char *msg)
135{
136 if (!test_bit(BH_Quiet, &bh->b_state))
137 printk_ratelimited(KERN_ERR
138 "Buffer I/O error on dev %pg, logical block %llu%s\n",
139 bh->b_bdev, (unsigned long long)bh->b_blocknr, msg);
140}
141
142
143
144
145
146
147
148
149
150static void __end_buffer_read_notouch(struct buffer_head *bh, int uptodate)
151{
152 if (uptodate) {
153 set_buffer_uptodate(bh);
154 } else {
155
156 clear_buffer_uptodate(bh);
157 }
158 unlock_buffer(bh);
159}
160
161
162
163
164
165void end_buffer_read_sync(struct buffer_head *bh, int uptodate)
166{
167 __end_buffer_read_notouch(bh, uptodate);
168 put_bh(bh);
169}
170EXPORT_SYMBOL(end_buffer_read_sync);
171
172void end_buffer_write_sync(struct buffer_head *bh, int uptodate)
173{
174 if (uptodate) {
175 set_buffer_uptodate(bh);
176 } else {
177 buffer_io_error(bh, ", lost sync page write");
178 mark_buffer_write_io_error(bh);
179 clear_buffer_uptodate(bh);
180 }
181 unlock_buffer(bh);
182 put_bh(bh);
183}
184EXPORT_SYMBOL(end_buffer_write_sync);
185
186
187
188
189
190
191
192
193
194
195
196static struct buffer_head *
197__find_get_block_slow(struct block_device *bdev, sector_t block)
198{
199 struct inode *bd_inode = bdev->bd_inode;
200 struct address_space *bd_mapping = bd_inode->i_mapping;
201 struct buffer_head *ret = NULL;
202 pgoff_t index;
203 struct buffer_head *bh;
204 struct buffer_head *head;
205 struct page *page;
206 int all_mapped = 1;
207 static DEFINE_RATELIMIT_STATE(last_warned, HZ, 1);
208
209 index = block >> (PAGE_SHIFT - bd_inode->i_blkbits);
210 page = find_get_page_flags(bd_mapping, index, FGP_ACCESSED);
211 if (!page)
212 goto out;
213
214 spin_lock(&bd_mapping->private_lock);
215 if (!page_has_buffers(page))
216 goto out_unlock;
217 head = page_buffers(page);
218 bh = head;
219 do {
220 if (!buffer_mapped(bh))
221 all_mapped = 0;
222 else if (bh->b_blocknr == block) {
223 ret = bh;
224 get_bh(bh);
225 goto out_unlock;
226 }
227 bh = bh->b_this_page;
228 } while (bh != head);
229
230
231
232
233
234
235 ratelimit_set_flags(&last_warned, RATELIMIT_MSG_ON_RELEASE);
236 if (all_mapped && __ratelimit(&last_warned)) {
237 printk("__find_get_block_slow() failed. block=%llu, "
238 "b_blocknr=%llu, b_state=0x%08lx, b_size=%zu, "
239 "device %pg blocksize: %d\n",
240 (unsigned long long)block,
241 (unsigned long long)bh->b_blocknr,
242 bh->b_state, bh->b_size, bdev,
243 1 << bd_inode->i_blkbits);
244 }
245out_unlock:
246 spin_unlock(&bd_mapping->private_lock);
247 put_page(page);
248out:
249 return ret;
250}
251
252static void end_buffer_async_read(struct buffer_head *bh, int uptodate)
253{
254 unsigned long flags;
255 struct buffer_head *first;
256 struct buffer_head *tmp;
257 struct page *page;
258 int page_uptodate = 1;
259
260 BUG_ON(!buffer_async_read(bh));
261
262 page = bh->b_page;
263 if (uptodate) {
264 set_buffer_uptodate(bh);
265 } else {
266 clear_buffer_uptodate(bh);
267 buffer_io_error(bh, ", async page read");
268 SetPageError(page);
269 }
270
271
272
273
274
275
276 first = page_buffers(page);
277 local_irq_save(flags);
278 bit_spin_lock(BH_Uptodate_Lock, &first->b_state);
279 clear_buffer_async_read(bh);
280 unlock_buffer(bh);
281 tmp = bh;
282 do {
283 if (!buffer_uptodate(tmp))
284 page_uptodate = 0;
285 if (buffer_async_read(tmp)) {
286 BUG_ON(!buffer_locked(tmp));
287 goto still_busy;
288 }
289 tmp = tmp->b_this_page;
290 } while (tmp != bh);
291 bit_spin_unlock(BH_Uptodate_Lock, &first->b_state);
292 local_irq_restore(flags);
293
294
295
296
297
298 if (page_uptodate && !PageError(page))
299 SetPageUptodate(page);
300 unlock_page(page);
301 return;
302
303still_busy:
304 bit_spin_unlock(BH_Uptodate_Lock, &first->b_state);
305 local_irq_restore(flags);
306 return;
307}
308
309struct decrypt_bh_ctx {
310 struct work_struct work;
311 struct buffer_head *bh;
312};
313
314static void decrypt_bh(struct work_struct *work)
315{
316 struct decrypt_bh_ctx *ctx =
317 container_of(work, struct decrypt_bh_ctx, work);
318 struct buffer_head *bh = ctx->bh;
319 int err;
320
321 err = fscrypt_decrypt_pagecache_blocks(bh->b_page, bh->b_size,
322 bh_offset(bh));
323 end_buffer_async_read(bh, err == 0);
324 kfree(ctx);
325}
326
327
328
329
330
331static void end_buffer_async_read_io(struct buffer_head *bh, int uptodate)
332{
333
334 if (uptodate && IS_ENABLED(CONFIG_FS_ENCRYPTION) &&
335 IS_ENCRYPTED(bh->b_page->mapping->host) &&
336 S_ISREG(bh->b_page->mapping->host->i_mode)) {
337 struct decrypt_bh_ctx *ctx = kmalloc(sizeof(*ctx), GFP_ATOMIC);
338
339 if (ctx) {
340 INIT_WORK(&ctx->work, decrypt_bh);
341 ctx->bh = bh;
342 fscrypt_enqueue_decrypt_work(&ctx->work);
343 return;
344 }
345 uptodate = 0;
346 }
347 end_buffer_async_read(bh, uptodate);
348}
349
350
351
352
353
354void end_buffer_async_write(struct buffer_head *bh, int uptodate)
355{
356 unsigned long flags;
357 struct buffer_head *first;
358 struct buffer_head *tmp;
359 struct page *page;
360
361 BUG_ON(!buffer_async_write(bh));
362
363 page = bh->b_page;
364 if (uptodate) {
365 set_buffer_uptodate(bh);
366 } else {
367 buffer_io_error(bh, ", lost async page write");
368 mark_buffer_write_io_error(bh);
369 clear_buffer_uptodate(bh);
370 SetPageError(page);
371 }
372
373 first = page_buffers(page);
374 local_irq_save(flags);
375 bit_spin_lock(BH_Uptodate_Lock, &first->b_state);
376
377 clear_buffer_async_write(bh);
378 unlock_buffer(bh);
379 tmp = bh->b_this_page;
380 while (tmp != bh) {
381 if (buffer_async_write(tmp)) {
382 BUG_ON(!buffer_locked(tmp));
383 goto still_busy;
384 }
385 tmp = tmp->b_this_page;
386 }
387 bit_spin_unlock(BH_Uptodate_Lock, &first->b_state);
388 local_irq_restore(flags);
389 end_page_writeback(page);
390 return;
391
392still_busy:
393 bit_spin_unlock(BH_Uptodate_Lock, &first->b_state);
394 local_irq_restore(flags);
395 return;
396}
397EXPORT_SYMBOL(end_buffer_async_write);
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420static void mark_buffer_async_read(struct buffer_head *bh)
421{
422 bh->b_end_io = end_buffer_async_read_io;
423 set_buffer_async_read(bh);
424}
425
426static void mark_buffer_async_write_endio(struct buffer_head *bh,
427 bh_end_io_t *handler)
428{
429 bh->b_end_io = handler;
430 set_buffer_async_write(bh);
431}
432
433void mark_buffer_async_write(struct buffer_head *bh)
434{
435 mark_buffer_async_write_endio(bh, end_buffer_async_write);
436}
437EXPORT_SYMBOL(mark_buffer_async_write);
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492static void __remove_assoc_queue(struct buffer_head *bh)
493{
494 list_del_init(&bh->b_assoc_buffers);
495 WARN_ON(!bh->b_assoc_map);
496 bh->b_assoc_map = NULL;
497}
498
499int inode_has_buffers(struct inode *inode)
500{
501 return !list_empty(&inode->i_data.private_list);
502}
503
504
505
506
507
508
509
510
511
512
513
514static int osync_buffers_list(spinlock_t *lock, struct list_head *list)
515{
516 struct buffer_head *bh;
517 struct list_head *p;
518 int err = 0;
519
520 spin_lock(lock);
521repeat:
522 list_for_each_prev(p, list) {
523 bh = BH_ENTRY(p);
524 if (buffer_locked(bh)) {
525 get_bh(bh);
526 spin_unlock(lock);
527 wait_on_buffer(bh);
528 if (!buffer_uptodate(bh))
529 err = -EIO;
530 brelse(bh);
531 spin_lock(lock);
532 goto repeat;
533 }
534 }
535 spin_unlock(lock);
536 return err;
537}
538
539void emergency_thaw_bdev(struct super_block *sb)
540{
541 while (sb->s_bdev && !thaw_bdev(sb->s_bdev, sb))
542 printk(KERN_WARNING "Emergency Thaw on %pg\n", sb->s_bdev);
543}
544
545
546
547
548
549
550
551
552
553
554
555
556int sync_mapping_buffers(struct address_space *mapping)
557{
558 struct address_space *buffer_mapping = mapping->private_data;
559
560 if (buffer_mapping == NULL || list_empty(&mapping->private_list))
561 return 0;
562
563 return fsync_buffers_list(&buffer_mapping->private_lock,
564 &mapping->private_list);
565}
566EXPORT_SYMBOL(sync_mapping_buffers);
567
568
569
570
571
572
573
574void write_boundary_block(struct block_device *bdev,
575 sector_t bblock, unsigned blocksize)
576{
577 struct buffer_head *bh = __find_get_block(bdev, bblock + 1, blocksize);
578 if (bh) {
579 if (buffer_dirty(bh))
580 ll_rw_block(REQ_OP_WRITE, 0, 1, &bh);
581 put_bh(bh);
582 }
583}
584
585void mark_buffer_dirty_inode(struct buffer_head *bh, struct inode *inode)
586{
587 struct address_space *mapping = inode->i_mapping;
588 struct address_space *buffer_mapping = bh->b_page->mapping;
589
590 mark_buffer_dirty(bh);
591 if (!mapping->private_data) {
592 mapping->private_data = buffer_mapping;
593 } else {
594 BUG_ON(mapping->private_data != buffer_mapping);
595 }
596 if (!bh->b_assoc_map) {
597 spin_lock(&buffer_mapping->private_lock);
598 list_move_tail(&bh->b_assoc_buffers,
599 &mapping->private_list);
600 bh->b_assoc_map = mapping;
601 spin_unlock(&buffer_mapping->private_lock);
602 }
603}
604EXPORT_SYMBOL(mark_buffer_dirty_inode);
605
606
607
608
609
610
611
612
613
614
615void __set_page_dirty(struct page *page, struct address_space *mapping,
616 int warn)
617{
618 unsigned long flags;
619
620 xa_lock_irqsave(&mapping->i_pages, flags);
621 if (page->mapping) {
622 WARN_ON_ONCE(warn && !PageUptodate(page));
623 account_page_dirtied(page, mapping);
624 __xa_set_mark(&mapping->i_pages, page_index(page),
625 PAGECACHE_TAG_DIRTY);
626 }
627 xa_unlock_irqrestore(&mapping->i_pages, flags);
628}
629EXPORT_SYMBOL_GPL(__set_page_dirty);
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656int __set_page_dirty_buffers(struct page *page)
657{
658 int newly_dirty;
659 struct address_space *mapping = page_mapping(page);
660
661 if (unlikely(!mapping))
662 return !TestSetPageDirty(page);
663
664 spin_lock(&mapping->private_lock);
665 if (page_has_buffers(page)) {
666 struct buffer_head *head = page_buffers(page);
667 struct buffer_head *bh = head;
668
669 do {
670 set_buffer_dirty(bh);
671 bh = bh->b_this_page;
672 } while (bh != head);
673 }
674
675
676
677
678 lock_page_memcg(page);
679 newly_dirty = !TestSetPageDirty(page);
680 spin_unlock(&mapping->private_lock);
681
682 if (newly_dirty)
683 __set_page_dirty(page, mapping, 1);
684
685 unlock_page_memcg(page);
686
687 if (newly_dirty)
688 __mark_inode_dirty(mapping->host, I_DIRTY_PAGES);
689
690 return newly_dirty;
691}
692EXPORT_SYMBOL(__set_page_dirty_buffers);
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713static int fsync_buffers_list(spinlock_t *lock, struct list_head *list)
714{
715 struct buffer_head *bh;
716 struct list_head tmp;
717 struct address_space *mapping;
718 int err = 0, err2;
719 struct blk_plug plug;
720
721 INIT_LIST_HEAD(&tmp);
722 blk_start_plug(&plug);
723
724 spin_lock(lock);
725 while (!list_empty(list)) {
726 bh = BH_ENTRY(list->next);
727 mapping = bh->b_assoc_map;
728 __remove_assoc_queue(bh);
729
730
731 smp_mb();
732 if (buffer_dirty(bh) || buffer_locked(bh)) {
733 list_add(&bh->b_assoc_buffers, &tmp);
734 bh->b_assoc_map = mapping;
735 if (buffer_dirty(bh)) {
736 get_bh(bh);
737 spin_unlock(lock);
738
739
740
741
742
743
744
745 write_dirty_buffer(bh, REQ_SYNC);
746
747
748
749
750
751
752
753 brelse(bh);
754 spin_lock(lock);
755 }
756 }
757 }
758
759 spin_unlock(lock);
760 blk_finish_plug(&plug);
761 spin_lock(lock);
762
763 while (!list_empty(&tmp)) {
764 bh = BH_ENTRY(tmp.prev);
765 get_bh(bh);
766 mapping = bh->b_assoc_map;
767 __remove_assoc_queue(bh);
768
769
770 smp_mb();
771 if (buffer_dirty(bh)) {
772 list_add(&bh->b_assoc_buffers,
773 &mapping->private_list);
774 bh->b_assoc_map = mapping;
775 }
776 spin_unlock(lock);
777 wait_on_buffer(bh);
778 if (!buffer_uptodate(bh))
779 err = -EIO;
780 brelse(bh);
781 spin_lock(lock);
782 }
783
784 spin_unlock(lock);
785 err2 = osync_buffers_list(lock, list);
786 if (err)
787 return err;
788 else
789 return err2;
790}
791
792
793
794
795
796
797
798
799
800
801void invalidate_inode_buffers(struct inode *inode)
802{
803 if (inode_has_buffers(inode)) {
804 struct address_space *mapping = &inode->i_data;
805 struct list_head *list = &mapping->private_list;
806 struct address_space *buffer_mapping = mapping->private_data;
807
808 spin_lock(&buffer_mapping->private_lock);
809 while (!list_empty(list))
810 __remove_assoc_queue(BH_ENTRY(list->next));
811 spin_unlock(&buffer_mapping->private_lock);
812 }
813}
814EXPORT_SYMBOL(invalidate_inode_buffers);
815
816
817
818
819
820
821
822int remove_inode_buffers(struct inode *inode)
823{
824 int ret = 1;
825
826 if (inode_has_buffers(inode)) {
827 struct address_space *mapping = &inode->i_data;
828 struct list_head *list = &mapping->private_list;
829 struct address_space *buffer_mapping = mapping->private_data;
830
831 spin_lock(&buffer_mapping->private_lock);
832 while (!list_empty(list)) {
833 struct buffer_head *bh = BH_ENTRY(list->next);
834 if (buffer_dirty(bh)) {
835 ret = 0;
836 break;
837 }
838 __remove_assoc_queue(bh);
839 }
840 spin_unlock(&buffer_mapping->private_lock);
841 }
842 return ret;
843}
844
845
846
847
848
849
850
851
852
853
854struct buffer_head *alloc_page_buffers(struct page *page, unsigned long size,
855 bool retry)
856{
857 struct buffer_head *bh, *head;
858 gfp_t gfp = GFP_NOFS | __GFP_ACCOUNT;
859 long offset;
860 struct mem_cgroup *memcg;
861
862 if (retry)
863 gfp |= __GFP_NOFAIL;
864
865 memcg = get_mem_cgroup_from_page(page);
866 memalloc_use_memcg(memcg);
867
868 head = NULL;
869 offset = PAGE_SIZE;
870 while ((offset -= size) >= 0) {
871 bh = alloc_buffer_head(gfp);
872 if (!bh)
873 goto no_grow;
874
875 bh->b_this_page = head;
876 bh->b_blocknr = -1;
877 head = bh;
878
879 bh->b_size = size;
880
881
882 set_bh_page(bh, page, offset);
883 }
884out:
885 memalloc_unuse_memcg();
886 mem_cgroup_put(memcg);
887 return head;
888
889
890
891no_grow:
892 if (head) {
893 do {
894 bh = head;
895 head = head->b_this_page;
896 free_buffer_head(bh);
897 } while (head);
898 }
899
900 goto out;
901}
902EXPORT_SYMBOL_GPL(alloc_page_buffers);
903
904static inline void
905link_dev_buffers(struct page *page, struct buffer_head *head)
906{
907 struct buffer_head *bh, *tail;
908
909 bh = head;
910 do {
911 tail = bh;
912 bh = bh->b_this_page;
913 } while (bh);
914 tail->b_this_page = head;
915 attach_page_buffers(page, head);
916}
917
918static sector_t blkdev_max_block(struct block_device *bdev, unsigned int size)
919{
920 sector_t retval = ~((sector_t)0);
921 loff_t sz = i_size_read(bdev->bd_inode);
922
923 if (sz) {
924 unsigned int sizebits = blksize_bits(size);
925 retval = (sz >> sizebits);
926 }
927 return retval;
928}
929
930
931
932
933static sector_t
934init_page_buffers(struct page *page, struct block_device *bdev,
935 sector_t block, int size)
936{
937 struct buffer_head *head = page_buffers(page);
938 struct buffer_head *bh = head;
939 int uptodate = PageUptodate(page);
940 sector_t end_block = blkdev_max_block(I_BDEV(bdev->bd_inode), size);
941
942 do {
943 if (!buffer_mapped(bh)) {
944 bh->b_end_io = NULL;
945 bh->b_private = NULL;
946 bh->b_bdev = bdev;
947 bh->b_blocknr = block;
948 if (uptodate)
949 set_buffer_uptodate(bh);
950 if (block < end_block)
951 set_buffer_mapped(bh);
952 }
953 block++;
954 bh = bh->b_this_page;
955 } while (bh != head);
956
957
958
959
960 return end_block;
961}
962
963
964
965
966
967
968static int
969grow_dev_page(struct block_device *bdev, sector_t block,
970 pgoff_t index, int size, int sizebits, gfp_t gfp)
971{
972 struct inode *inode = bdev->bd_inode;
973 struct page *page;
974 struct buffer_head *bh;
975 sector_t end_block;
976 int ret = 0;
977 gfp_t gfp_mask;
978
979 gfp_mask = mapping_gfp_constraint(inode->i_mapping, ~__GFP_FS) | gfp;
980
981
982
983
984
985
986
987 gfp_mask |= __GFP_NOFAIL;
988
989 page = find_or_create_page(inode->i_mapping, index, gfp_mask);
990
991 BUG_ON(!PageLocked(page));
992
993 if (page_has_buffers(page)) {
994 bh = page_buffers(page);
995 if (bh->b_size == size) {
996 end_block = init_page_buffers(page, bdev,
997 (sector_t)index << sizebits,
998 size);
999 goto done;
1000 }
1001 if (!try_to_free_buffers(page))
1002 goto failed;
1003 }
1004
1005
1006
1007
1008 bh = alloc_page_buffers(page, size, true);
1009
1010
1011
1012
1013
1014
1015 spin_lock(&inode->i_mapping->private_lock);
1016 link_dev_buffers(page, bh);
1017 end_block = init_page_buffers(page, bdev, (sector_t)index << sizebits,
1018 size);
1019 spin_unlock(&inode->i_mapping->private_lock);
1020done:
1021 ret = (block < end_block) ? 1 : -ENXIO;
1022failed:
1023 unlock_page(page);
1024 put_page(page);
1025 return ret;
1026}
1027
1028
1029
1030
1031
1032static int
1033grow_buffers(struct block_device *bdev, sector_t block, int size, gfp_t gfp)
1034{
1035 pgoff_t index;
1036 int sizebits;
1037
1038 sizebits = -1;
1039 do {
1040 sizebits++;
1041 } while ((size << sizebits) < PAGE_SIZE);
1042
1043 index = block >> sizebits;
1044
1045
1046
1047
1048
1049 if (unlikely(index != block >> sizebits)) {
1050 printk(KERN_ERR "%s: requested out-of-range block %llu for "
1051 "device %pg\n",
1052 __func__, (unsigned long long)block,
1053 bdev);
1054 return -EIO;
1055 }
1056
1057
1058 return grow_dev_page(bdev, block, index, size, sizebits, gfp);
1059}
1060
1061static struct buffer_head *
1062__getblk_slow(struct block_device *bdev, sector_t block,
1063 unsigned size, gfp_t gfp)
1064{
1065
1066 if (unlikely(size & (bdev_logical_block_size(bdev)-1) ||
1067 (size < 512 || size > PAGE_SIZE))) {
1068 printk(KERN_ERR "getblk(): invalid block size %d requested\n",
1069 size);
1070 printk(KERN_ERR "logical block size: %d\n",
1071 bdev_logical_block_size(bdev));
1072
1073 dump_stack();
1074 return NULL;
1075 }
1076
1077 for (;;) {
1078 struct buffer_head *bh;
1079 int ret;
1080
1081 bh = __find_get_block(bdev, block, size);
1082 if (bh)
1083 return bh;
1084
1085 ret = grow_buffers(bdev, block, size, gfp);
1086 if (ret < 0)
1087 return NULL;
1088 }
1089}
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126void mark_buffer_dirty(struct buffer_head *bh)
1127{
1128 WARN_ON_ONCE(!buffer_uptodate(bh));
1129
1130 trace_block_dirty_buffer(bh);
1131
1132
1133
1134
1135
1136
1137
1138 if (buffer_dirty(bh)) {
1139 smp_mb();
1140 if (buffer_dirty(bh))
1141 return;
1142 }
1143
1144 if (!test_set_buffer_dirty(bh)) {
1145 struct page *page = bh->b_page;
1146 struct address_space *mapping = NULL;
1147
1148 lock_page_memcg(page);
1149 if (!TestSetPageDirty(page)) {
1150 mapping = page_mapping(page);
1151 if (mapping)
1152 __set_page_dirty(page, mapping, 0);
1153 }
1154 unlock_page_memcg(page);
1155 if (mapping)
1156 __mark_inode_dirty(mapping->host, I_DIRTY_PAGES);
1157 }
1158}
1159EXPORT_SYMBOL(mark_buffer_dirty);
1160
1161void mark_buffer_write_io_error(struct buffer_head *bh)
1162{
1163 set_buffer_write_io_error(bh);
1164
1165 if (bh->b_page && bh->b_page->mapping)
1166 mapping_set_error(bh->b_page->mapping, -EIO);
1167 if (bh->b_assoc_map)
1168 mapping_set_error(bh->b_assoc_map, -EIO);
1169}
1170EXPORT_SYMBOL(mark_buffer_write_io_error);
1171
1172
1173
1174
1175
1176
1177
1178
1179void __brelse(struct buffer_head * buf)
1180{
1181 if (atomic_read(&buf->b_count)) {
1182 put_bh(buf);
1183 return;
1184 }
1185 WARN(1, KERN_ERR "VFS: brelse: Trying to free free buffer\n");
1186}
1187EXPORT_SYMBOL(__brelse);
1188
1189
1190
1191
1192
1193void __bforget(struct buffer_head *bh)
1194{
1195 clear_buffer_dirty(bh);
1196 if (bh->b_assoc_map) {
1197 struct address_space *buffer_mapping = bh->b_page->mapping;
1198
1199 spin_lock(&buffer_mapping->private_lock);
1200 list_del_init(&bh->b_assoc_buffers);
1201 bh->b_assoc_map = NULL;
1202 spin_unlock(&buffer_mapping->private_lock);
1203 }
1204 __brelse(bh);
1205}
1206EXPORT_SYMBOL(__bforget);
1207
1208static struct buffer_head *__bread_slow(struct buffer_head *bh)
1209{
1210 lock_buffer(bh);
1211 if (buffer_uptodate(bh)) {
1212 unlock_buffer(bh);
1213 return bh;
1214 } else {
1215 get_bh(bh);
1216 bh->b_end_io = end_buffer_read_sync;
1217 submit_bh(REQ_OP_READ, 0, bh);
1218 wait_on_buffer(bh);
1219 if (buffer_uptodate(bh))
1220 return bh;
1221 }
1222 brelse(bh);
1223 return NULL;
1224}
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240#define BH_LRU_SIZE 16
1241
1242struct bh_lru {
1243 struct buffer_head *bhs[BH_LRU_SIZE];
1244};
1245
1246static DEFINE_PER_CPU(struct bh_lru, bh_lrus) = {{ NULL }};
1247
1248#ifdef CONFIG_SMP
1249#define bh_lru_lock() local_irq_disable()
1250#define bh_lru_unlock() local_irq_enable()
1251#else
1252#define bh_lru_lock() preempt_disable()
1253#define bh_lru_unlock() preempt_enable()
1254#endif
1255
1256static inline void check_irqs_on(void)
1257{
1258#ifdef irqs_disabled
1259 BUG_ON(irqs_disabled());
1260#endif
1261}
1262
1263
1264
1265
1266
1267
1268static void bh_lru_install(struct buffer_head *bh)
1269{
1270 struct buffer_head *evictee = bh;
1271 struct bh_lru *b;
1272 int i;
1273
1274 check_irqs_on();
1275 bh_lru_lock();
1276
1277 b = this_cpu_ptr(&bh_lrus);
1278 for (i = 0; i < BH_LRU_SIZE; i++) {
1279 swap(evictee, b->bhs[i]);
1280 if (evictee == bh) {
1281 bh_lru_unlock();
1282 return;
1283 }
1284 }
1285
1286 get_bh(bh);
1287 bh_lru_unlock();
1288 brelse(evictee);
1289}
1290
1291
1292
1293
1294static struct buffer_head *
1295lookup_bh_lru(struct block_device *bdev, sector_t block, unsigned size)
1296{
1297 struct buffer_head *ret = NULL;
1298 unsigned int i;
1299
1300 check_irqs_on();
1301 bh_lru_lock();
1302 for (i = 0; i < BH_LRU_SIZE; i++) {
1303 struct buffer_head *bh = __this_cpu_read(bh_lrus.bhs[i]);
1304
1305 if (bh && bh->b_blocknr == block && bh->b_bdev == bdev &&
1306 bh->b_size == size) {
1307 if (i) {
1308 while (i) {
1309 __this_cpu_write(bh_lrus.bhs[i],
1310 __this_cpu_read(bh_lrus.bhs[i - 1]));
1311 i--;
1312 }
1313 __this_cpu_write(bh_lrus.bhs[0], bh);
1314 }
1315 get_bh(bh);
1316 ret = bh;
1317 break;
1318 }
1319 }
1320 bh_lru_unlock();
1321 return ret;
1322}
1323
1324
1325
1326
1327
1328
1329struct buffer_head *
1330__find_get_block(struct block_device *bdev, sector_t block, unsigned size)
1331{
1332 struct buffer_head *bh = lookup_bh_lru(bdev, block, size);
1333
1334 if (bh == NULL) {
1335
1336 bh = __find_get_block_slow(bdev, block);
1337 if (bh)
1338 bh_lru_install(bh);
1339 } else
1340 touch_buffer(bh);
1341
1342 return bh;
1343}
1344EXPORT_SYMBOL(__find_get_block);
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354struct buffer_head *
1355__getblk_gfp(struct block_device *bdev, sector_t block,
1356 unsigned size, gfp_t gfp)
1357{
1358 struct buffer_head *bh = __find_get_block(bdev, block, size);
1359
1360 might_sleep();
1361 if (bh == NULL)
1362 bh = __getblk_slow(bdev, block, size, gfp);
1363 return bh;
1364}
1365EXPORT_SYMBOL(__getblk_gfp);
1366
1367
1368
1369
1370void __breadahead(struct block_device *bdev, sector_t block, unsigned size)
1371{
1372 struct buffer_head *bh = __getblk(bdev, block, size);
1373 if (likely(bh)) {
1374 ll_rw_block(REQ_OP_READ, REQ_RAHEAD, 1, &bh);
1375 brelse(bh);
1376 }
1377}
1378EXPORT_SYMBOL(__breadahead);
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392struct buffer_head *
1393__bread_gfp(struct block_device *bdev, sector_t block,
1394 unsigned size, gfp_t gfp)
1395{
1396 struct buffer_head *bh = __getblk_gfp(bdev, block, size, gfp);
1397
1398 if (likely(bh) && !buffer_uptodate(bh))
1399 bh = __bread_slow(bh);
1400 return bh;
1401}
1402EXPORT_SYMBOL(__bread_gfp);
1403
1404
1405
1406
1407
1408
1409static void invalidate_bh_lru(void *arg)
1410{
1411 struct bh_lru *b = &get_cpu_var(bh_lrus);
1412 int i;
1413
1414 for (i = 0; i < BH_LRU_SIZE; i++) {
1415 brelse(b->bhs[i]);
1416 b->bhs[i] = NULL;
1417 }
1418 put_cpu_var(bh_lrus);
1419}
1420
1421static bool has_bh_in_lru(int cpu, void *dummy)
1422{
1423 struct bh_lru *b = per_cpu_ptr(&bh_lrus, cpu);
1424 int i;
1425
1426 for (i = 0; i < BH_LRU_SIZE; i++) {
1427 if (b->bhs[i])
1428 return true;
1429 }
1430
1431 return false;
1432}
1433
1434void invalidate_bh_lrus(void)
1435{
1436 on_each_cpu_cond(has_bh_in_lru, invalidate_bh_lru, NULL, 1, GFP_KERNEL);
1437}
1438EXPORT_SYMBOL_GPL(invalidate_bh_lrus);
1439
1440void set_bh_page(struct buffer_head *bh,
1441 struct page *page, unsigned long offset)
1442{
1443 bh->b_page = page;
1444 BUG_ON(offset >= PAGE_SIZE);
1445 if (PageHighMem(page))
1446
1447
1448
1449 bh->b_data = (char *)(0 + offset);
1450 else
1451 bh->b_data = page_address(page) + offset;
1452}
1453EXPORT_SYMBOL(set_bh_page);
1454
1455
1456
1457
1458
1459
1460#define BUFFER_FLAGS_DISCARD \
1461 (1 << BH_Mapped | 1 << BH_New | 1 << BH_Req | \
1462 1 << BH_Delay | 1 << BH_Unwritten)
1463
1464static void discard_buffer(struct buffer_head * bh)
1465{
1466 unsigned long b_state, b_state_old;
1467
1468 lock_buffer(bh);
1469 clear_buffer_dirty(bh);
1470 bh->b_bdev = NULL;
1471 b_state = bh->b_state;
1472 for (;;) {
1473 b_state_old = cmpxchg(&bh->b_state, b_state,
1474 (b_state & ~BUFFER_FLAGS_DISCARD));
1475 if (b_state_old == b_state)
1476 break;
1477 b_state = b_state_old;
1478 }
1479 unlock_buffer(bh);
1480}
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498void block_invalidatepage(struct page *page, unsigned int offset,
1499 unsigned int length)
1500{
1501 struct buffer_head *head, *bh, *next;
1502 unsigned int curr_off = 0;
1503 unsigned int stop = length + offset;
1504
1505 BUG_ON(!PageLocked(page));
1506 if (!page_has_buffers(page))
1507 goto out;
1508
1509
1510
1511
1512 BUG_ON(stop > PAGE_SIZE || stop < length);
1513
1514 head = page_buffers(page);
1515 bh = head;
1516 do {
1517 unsigned int next_off = curr_off + bh->b_size;
1518 next = bh->b_this_page;
1519
1520
1521
1522
1523 if (next_off > stop)
1524 goto out;
1525
1526
1527
1528
1529 if (offset <= curr_off)
1530 discard_buffer(bh);
1531 curr_off = next_off;
1532 bh = next;
1533 } while (bh != head);
1534
1535
1536
1537
1538
1539
1540 if (length == PAGE_SIZE)
1541 try_to_release_page(page, 0);
1542out:
1543 return;
1544}
1545EXPORT_SYMBOL(block_invalidatepage);
1546
1547
1548
1549
1550
1551
1552
1553void create_empty_buffers(struct page *page,
1554 unsigned long blocksize, unsigned long b_state)
1555{
1556 struct buffer_head *bh, *head, *tail;
1557
1558 head = alloc_page_buffers(page, blocksize, true);
1559 bh = head;
1560 do {
1561 bh->b_state |= b_state;
1562 tail = bh;
1563 bh = bh->b_this_page;
1564 } while (bh);
1565 tail->b_this_page = head;
1566
1567 spin_lock(&page->mapping->private_lock);
1568 if (PageUptodate(page) || PageDirty(page)) {
1569 bh = head;
1570 do {
1571 if (PageDirty(page))
1572 set_buffer_dirty(bh);
1573 if (PageUptodate(page))
1574 set_buffer_uptodate(bh);
1575 bh = bh->b_this_page;
1576 } while (bh != head);
1577 }
1578 attach_page_buffers(page, head);
1579 spin_unlock(&page->mapping->private_lock);
1580}
1581EXPORT_SYMBOL(create_empty_buffers);
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603void clean_bdev_aliases(struct block_device *bdev, sector_t block, sector_t len)
1604{
1605 struct inode *bd_inode = bdev->bd_inode;
1606 struct address_space *bd_mapping = bd_inode->i_mapping;
1607 struct pagevec pvec;
1608 pgoff_t index = block >> (PAGE_SHIFT - bd_inode->i_blkbits);
1609 pgoff_t end;
1610 int i, count;
1611 struct buffer_head *bh;
1612 struct buffer_head *head;
1613
1614 end = (block + len - 1) >> (PAGE_SHIFT - bd_inode->i_blkbits);
1615 pagevec_init(&pvec);
1616 while (pagevec_lookup_range(&pvec, bd_mapping, &index, end)) {
1617 count = pagevec_count(&pvec);
1618 for (i = 0; i < count; i++) {
1619 struct page *page = pvec.pages[i];
1620
1621 if (!page_has_buffers(page))
1622 continue;
1623
1624
1625
1626
1627
1628 lock_page(page);
1629
1630 if (!page_has_buffers(page))
1631 goto unlock_page;
1632 head = page_buffers(page);
1633 bh = head;
1634 do {
1635 if (!buffer_mapped(bh) || (bh->b_blocknr < block))
1636 goto next;
1637 if (bh->b_blocknr >= block + len)
1638 break;
1639 clear_buffer_dirty(bh);
1640 wait_on_buffer(bh);
1641 clear_buffer_req(bh);
1642next:
1643 bh = bh->b_this_page;
1644 } while (bh != head);
1645unlock_page:
1646 unlock_page(page);
1647 }
1648 pagevec_release(&pvec);
1649 cond_resched();
1650
1651 if (index > end || !index)
1652 break;
1653 }
1654}
1655EXPORT_SYMBOL(clean_bdev_aliases);
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665static inline int block_size_bits(unsigned int blocksize)
1666{
1667 return ilog2(blocksize);
1668}
1669
1670static struct buffer_head *create_page_buffers(struct page *page, struct inode *inode, unsigned int b_state)
1671{
1672 BUG_ON(!PageLocked(page));
1673
1674 if (!page_has_buffers(page))
1675 create_empty_buffers(page, 1 << READ_ONCE(inode->i_blkbits),
1676 b_state);
1677 return page_buffers(page);
1678}
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709int __block_write_full_page(struct inode *inode, struct page *page,
1710 get_block_t *get_block, struct writeback_control *wbc,
1711 bh_end_io_t *handler)
1712{
1713 int err;
1714 sector_t block;
1715 sector_t last_block;
1716 struct buffer_head *bh, *head;
1717 unsigned int blocksize, bbits;
1718 int nr_underway = 0;
1719 int write_flags = wbc_to_write_flags(wbc);
1720
1721 head = create_page_buffers(page, inode,
1722 (1 << BH_Dirty)|(1 << BH_Uptodate));
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734 bh = head;
1735 blocksize = bh->b_size;
1736 bbits = block_size_bits(blocksize);
1737
1738 block = (sector_t)page->index << (PAGE_SHIFT - bbits);
1739 last_block = (i_size_read(inode) - 1) >> bbits;
1740
1741
1742
1743
1744
1745 do {
1746 if (block > last_block) {
1747
1748
1749
1750
1751
1752
1753
1754
1755 clear_buffer_dirty(bh);
1756 set_buffer_uptodate(bh);
1757 } else if ((!buffer_mapped(bh) || buffer_delay(bh)) &&
1758 buffer_dirty(bh)) {
1759 WARN_ON(bh->b_size != blocksize);
1760 err = get_block(inode, block, bh, 1);
1761 if (err)
1762 goto recover;
1763 clear_buffer_delay(bh);
1764 if (buffer_new(bh)) {
1765
1766 clear_buffer_new(bh);
1767 clean_bdev_bh_alias(bh);
1768 }
1769 }
1770 bh = bh->b_this_page;
1771 block++;
1772 } while (bh != head);
1773
1774 do {
1775 if (!buffer_mapped(bh))
1776 continue;
1777
1778
1779
1780
1781
1782
1783
1784 if (wbc->sync_mode != WB_SYNC_NONE) {
1785 lock_buffer(bh);
1786 } else if (!trylock_buffer(bh)) {
1787 redirty_page_for_writepage(wbc, page);
1788 continue;
1789 }
1790 if (test_clear_buffer_dirty(bh)) {
1791 mark_buffer_async_write_endio(bh, handler);
1792 } else {
1793 unlock_buffer(bh);
1794 }
1795 } while ((bh = bh->b_this_page) != head);
1796
1797
1798
1799
1800
1801 BUG_ON(PageWriteback(page));
1802 set_page_writeback(page);
1803
1804 do {
1805 struct buffer_head *next = bh->b_this_page;
1806 if (buffer_async_write(bh)) {
1807 submit_bh_wbc(REQ_OP_WRITE, write_flags, bh,
1808 inode->i_write_hint, wbc);
1809 nr_underway++;
1810 }
1811 bh = next;
1812 } while (bh != head);
1813 unlock_page(page);
1814
1815 err = 0;
1816done:
1817 if (nr_underway == 0) {
1818
1819
1820
1821
1822
1823 end_page_writeback(page);
1824
1825
1826
1827
1828
1829 }
1830 return err;
1831
1832recover:
1833
1834
1835
1836
1837
1838
1839 bh = head;
1840
1841 do {
1842 if (buffer_mapped(bh) && buffer_dirty(bh) &&
1843 !buffer_delay(bh)) {
1844 lock_buffer(bh);
1845 mark_buffer_async_write_endio(bh, handler);
1846 } else {
1847
1848
1849
1850
1851 clear_buffer_dirty(bh);
1852 }
1853 } while ((bh = bh->b_this_page) != head);
1854 SetPageError(page);
1855 BUG_ON(PageWriteback(page));
1856 mapping_set_error(page->mapping, err);
1857 set_page_writeback(page);
1858 do {
1859 struct buffer_head *next = bh->b_this_page;
1860 if (buffer_async_write(bh)) {
1861 clear_buffer_dirty(bh);
1862 submit_bh_wbc(REQ_OP_WRITE, write_flags, bh,
1863 inode->i_write_hint, wbc);
1864 nr_underway++;
1865 }
1866 bh = next;
1867 } while (bh != head);
1868 unlock_page(page);
1869 goto done;
1870}
1871EXPORT_SYMBOL(__block_write_full_page);
1872
1873
1874
1875
1876
1877
1878void page_zero_new_buffers(struct page *page, unsigned from, unsigned to)
1879{
1880 unsigned int block_start, block_end;
1881 struct buffer_head *head, *bh;
1882
1883 BUG_ON(!PageLocked(page));
1884 if (!page_has_buffers(page))
1885 return;
1886
1887 bh = head = page_buffers(page);
1888 block_start = 0;
1889 do {
1890 block_end = block_start + bh->b_size;
1891
1892 if (buffer_new(bh)) {
1893 if (block_end > from && block_start < to) {
1894 if (!PageUptodate(page)) {
1895 unsigned start, size;
1896
1897 start = max(from, block_start);
1898 size = min(to, block_end) - start;
1899
1900 zero_user(page, start, size);
1901 set_buffer_uptodate(bh);
1902 }
1903
1904 clear_buffer_new(bh);
1905 mark_buffer_dirty(bh);
1906 }
1907 }
1908
1909 block_start = block_end;
1910 bh = bh->b_this_page;
1911 } while (bh != head);
1912}
1913EXPORT_SYMBOL(page_zero_new_buffers);
1914
1915static void
1916iomap_to_bh(struct inode *inode, sector_t block, struct buffer_head *bh,
1917 struct iomap *iomap)
1918{
1919 loff_t offset = block << inode->i_blkbits;
1920
1921 bh->b_bdev = iomap->bdev;
1922
1923
1924
1925
1926
1927
1928
1929 BUG_ON(offset >= iomap->offset + iomap->length);
1930
1931 switch (iomap->type) {
1932 case IOMAP_HOLE:
1933
1934
1935
1936
1937
1938 if (!buffer_uptodate(bh) ||
1939 (offset >= i_size_read(inode)))
1940 set_buffer_new(bh);
1941 break;
1942 case IOMAP_DELALLOC:
1943 if (!buffer_uptodate(bh) ||
1944 (offset >= i_size_read(inode)))
1945 set_buffer_new(bh);
1946 set_buffer_uptodate(bh);
1947 set_buffer_mapped(bh);
1948 set_buffer_delay(bh);
1949 break;
1950 case IOMAP_UNWRITTEN:
1951
1952
1953
1954
1955
1956 set_buffer_new(bh);
1957 set_buffer_unwritten(bh);
1958
1959 case IOMAP_MAPPED:
1960 if ((iomap->flags & IOMAP_F_NEW) ||
1961 offset >= i_size_read(inode))
1962 set_buffer_new(bh);
1963 bh->b_blocknr = (iomap->addr + offset - iomap->offset) >>
1964 inode->i_blkbits;
1965 set_buffer_mapped(bh);
1966 break;
1967 }
1968}
1969
1970int __block_write_begin_int(struct page *page, loff_t pos, unsigned len,
1971 get_block_t *get_block, struct iomap *iomap)
1972{
1973 unsigned from = pos & (PAGE_SIZE - 1);
1974 unsigned to = from + len;
1975 struct inode *inode = page->mapping->host;
1976 unsigned block_start, block_end;
1977 sector_t block;
1978 int err = 0;
1979 unsigned blocksize, bbits;
1980 struct buffer_head *bh, *head, *wait[2], **wait_bh=wait;
1981
1982 BUG_ON(!PageLocked(page));
1983 BUG_ON(from > PAGE_SIZE);
1984 BUG_ON(to > PAGE_SIZE);
1985 BUG_ON(from > to);
1986
1987 head = create_page_buffers(page, inode, 0);
1988 blocksize = head->b_size;
1989 bbits = block_size_bits(blocksize);
1990
1991 block = (sector_t)page->index << (PAGE_SHIFT - bbits);
1992
1993 for(bh = head, block_start = 0; bh != head || !block_start;
1994 block++, block_start=block_end, bh = bh->b_this_page) {
1995 block_end = block_start + blocksize;
1996 if (block_end <= from || block_start >= to) {
1997 if (PageUptodate(page)) {
1998 if (!buffer_uptodate(bh))
1999 set_buffer_uptodate(bh);
2000 }
2001 continue;
2002 }
2003 if (buffer_new(bh))
2004 clear_buffer_new(bh);
2005 if (!buffer_mapped(bh)) {
2006 WARN_ON(bh->b_size != blocksize);
2007 if (get_block) {
2008 err = get_block(inode, block, bh, 1);
2009 if (err)
2010 break;
2011 } else {
2012 iomap_to_bh(inode, block, bh, iomap);
2013 }
2014
2015 if (buffer_new(bh)) {
2016 clean_bdev_bh_alias(bh);
2017 if (PageUptodate(page)) {
2018 clear_buffer_new(bh);
2019 set_buffer_uptodate(bh);
2020 mark_buffer_dirty(bh);
2021 continue;
2022 }
2023 if (block_end > to || block_start < from)
2024 zero_user_segments(page,
2025 to, block_end,
2026 block_start, from);
2027 continue;
2028 }
2029 }
2030 if (PageUptodate(page)) {
2031 if (!buffer_uptodate(bh))
2032 set_buffer_uptodate(bh);
2033 continue;
2034 }
2035 if (!buffer_uptodate(bh) && !buffer_delay(bh) &&
2036 !buffer_unwritten(bh) &&
2037 (block_start < from || block_end > to)) {
2038 ll_rw_block(REQ_OP_READ, 0, 1, &bh);
2039 *wait_bh++=bh;
2040 }
2041 }
2042
2043
2044
2045 while(wait_bh > wait) {
2046 wait_on_buffer(*--wait_bh);
2047 if (!buffer_uptodate(*wait_bh))
2048 err = -EIO;
2049 }
2050 if (unlikely(err))
2051 page_zero_new_buffers(page, from, to);
2052 return err;
2053}
2054
2055int __block_write_begin(struct page *page, loff_t pos, unsigned len,
2056 get_block_t *get_block)
2057{
2058 return __block_write_begin_int(page, pos, len, get_block, NULL);
2059}
2060EXPORT_SYMBOL(__block_write_begin);
2061
2062static int __block_commit_write(struct inode *inode, struct page *page,
2063 unsigned from, unsigned to)
2064{
2065 unsigned block_start, block_end;
2066 int partial = 0;
2067 unsigned blocksize;
2068 struct buffer_head *bh, *head;
2069
2070 bh = head = page_buffers(page);
2071 blocksize = bh->b_size;
2072
2073 block_start = 0;
2074 do {
2075 block_end = block_start + blocksize;
2076 if (block_end <= from || block_start >= to) {
2077 if (!buffer_uptodate(bh))
2078 partial = 1;
2079 } else {
2080 set_buffer_uptodate(bh);
2081 mark_buffer_dirty(bh);
2082 }
2083 clear_buffer_new(bh);
2084
2085 block_start = block_end;
2086 bh = bh->b_this_page;
2087 } while (bh != head);
2088
2089
2090
2091
2092
2093
2094
2095 if (!partial)
2096 SetPageUptodate(page);
2097 return 0;
2098}
2099
2100
2101
2102
2103
2104
2105
2106int block_write_begin(struct address_space *mapping, loff_t pos, unsigned len,
2107 unsigned flags, struct page **pagep, get_block_t *get_block)
2108{
2109 pgoff_t index = pos >> PAGE_SHIFT;
2110 struct page *page;
2111 int status;
2112
2113 page = grab_cache_page_write_begin(mapping, index, flags);
2114 if (!page)
2115 return -ENOMEM;
2116
2117 status = __block_write_begin(page, pos, len, get_block);
2118 if (unlikely(status)) {
2119 unlock_page(page);
2120 put_page(page);
2121 page = NULL;
2122 }
2123
2124 *pagep = page;
2125 return status;
2126}
2127EXPORT_SYMBOL(block_write_begin);
2128
2129int block_write_end(struct file *file, struct address_space *mapping,
2130 loff_t pos, unsigned len, unsigned copied,
2131 struct page *page, void *fsdata)
2132{
2133 struct inode *inode = mapping->host;
2134 unsigned start;
2135
2136 start = pos & (PAGE_SIZE - 1);
2137
2138 if (unlikely(copied < len)) {
2139
2140
2141
2142
2143
2144
2145
2146
2147
2148
2149
2150
2151 if (!PageUptodate(page))
2152 copied = 0;
2153
2154 page_zero_new_buffers(page, start+copied, start+len);
2155 }
2156 flush_dcache_page(page);
2157
2158
2159 __block_commit_write(inode, page, start, start+copied);
2160
2161 return copied;
2162}
2163EXPORT_SYMBOL(block_write_end);
2164
2165int generic_write_end(struct file *file, struct address_space *mapping,
2166 loff_t pos, unsigned len, unsigned copied,
2167 struct page *page, void *fsdata)
2168{
2169 struct inode *inode = mapping->host;
2170 loff_t old_size = inode->i_size;
2171 bool i_size_changed = false;
2172
2173 copied = block_write_end(file, mapping, pos, len, copied, page, fsdata);
2174
2175
2176
2177
2178
2179
2180
2181
2182 if (pos + copied > inode->i_size) {
2183 i_size_write(inode, pos + copied);
2184 i_size_changed = true;
2185 }
2186
2187 unlock_page(page);
2188 put_page(page);
2189
2190 if (old_size < pos)
2191 pagecache_isize_extended(inode, old_size, pos);
2192
2193
2194
2195
2196
2197
2198 if (i_size_changed)
2199 mark_inode_dirty(inode);
2200 return copied;
2201}
2202EXPORT_SYMBOL(generic_write_end);
2203
2204
2205
2206
2207
2208
2209
2210
2211int block_is_partially_uptodate(struct page *page, unsigned long from,
2212 unsigned long count)
2213{
2214 unsigned block_start, block_end, blocksize;
2215 unsigned to;
2216 struct buffer_head *bh, *head;
2217 int ret = 1;
2218
2219 if (!page_has_buffers(page))
2220 return 0;
2221
2222 head = page_buffers(page);
2223 blocksize = head->b_size;
2224 to = min_t(unsigned, PAGE_SIZE - from, count);
2225 to = from + to;
2226 if (from < blocksize && to > PAGE_SIZE - blocksize)
2227 return 0;
2228
2229 bh = head;
2230 block_start = 0;
2231 do {
2232 block_end = block_start + blocksize;
2233 if (block_end > from && block_start < to) {
2234 if (!buffer_uptodate(bh)) {
2235 ret = 0;
2236 break;
2237 }
2238 if (block_end >= to)
2239 break;
2240 }
2241 block_start = block_end;
2242 bh = bh->b_this_page;
2243 } while (bh != head);
2244
2245 return ret;
2246}
2247EXPORT_SYMBOL(block_is_partially_uptodate);
2248
2249
2250
2251
2252
2253
2254
2255
2256int block_read_full_page(struct page *page, get_block_t *get_block)
2257{
2258 struct inode *inode = page->mapping->host;
2259 sector_t iblock, lblock;
2260 struct buffer_head *bh, *head, *arr[MAX_BUF_PER_PAGE];
2261 unsigned int blocksize, bbits;
2262 int nr, i;
2263 int fully_mapped = 1;
2264
2265 head = create_page_buffers(page, inode, 0);
2266 blocksize = head->b_size;
2267 bbits = block_size_bits(blocksize);
2268
2269 iblock = (sector_t)page->index << (PAGE_SHIFT - bbits);
2270 lblock = (i_size_read(inode)+blocksize-1) >> bbits;
2271 bh = head;
2272 nr = 0;
2273 i = 0;
2274
2275 do {
2276 if (buffer_uptodate(bh))
2277 continue;
2278
2279 if (!buffer_mapped(bh)) {
2280 int err = 0;
2281
2282 fully_mapped = 0;
2283 if (iblock < lblock) {
2284 WARN_ON(bh->b_size != blocksize);
2285 err = get_block(inode, iblock, bh, 0);
2286 if (err)
2287 SetPageError(page);
2288 }
2289 if (!buffer_mapped(bh)) {
2290 zero_user(page, i * blocksize, blocksize);
2291 if (!err)
2292 set_buffer_uptodate(bh);
2293 continue;
2294 }
2295
2296
2297
2298
2299 if (buffer_uptodate(bh))
2300 continue;
2301 }
2302 arr[nr++] = bh;
2303 } while (i++, iblock++, (bh = bh->b_this_page) != head);
2304
2305 if (fully_mapped)
2306 SetPageMappedToDisk(page);
2307
2308 if (!nr) {
2309
2310
2311
2312
2313 if (!PageError(page))
2314 SetPageUptodate(page);
2315 unlock_page(page);
2316 return 0;
2317 }
2318
2319
2320 for (i = 0; i < nr; i++) {
2321 bh = arr[i];
2322 lock_buffer(bh);
2323 mark_buffer_async_read(bh);
2324 }
2325
2326
2327
2328
2329
2330
2331 for (i = 0; i < nr; i++) {
2332 bh = arr[i];
2333 if (buffer_uptodate(bh))
2334 end_buffer_async_read(bh, 1);
2335 else
2336 submit_bh(REQ_OP_READ, 0, bh);
2337 }
2338 return 0;
2339}
2340EXPORT_SYMBOL(block_read_full_page);
2341
2342
2343
2344
2345
2346int generic_cont_expand_simple(struct inode *inode, loff_t size)
2347{
2348 struct address_space *mapping = inode->i_mapping;
2349 struct page *page;
2350 void *fsdata;
2351 int err;
2352
2353 err = inode_newsize_ok(inode, size);
2354 if (err)
2355 goto out;
2356
2357 err = pagecache_write_begin(NULL, mapping, size, 0,
2358 AOP_FLAG_CONT_EXPAND, &page, &fsdata);
2359 if (err)
2360 goto out;
2361
2362 err = pagecache_write_end(NULL, mapping, size, 0, 0, page, fsdata);
2363 BUG_ON(err > 0);
2364
2365out:
2366 return err;
2367}
2368EXPORT_SYMBOL(generic_cont_expand_simple);
2369
2370static int cont_expand_zero(struct file *file, struct address_space *mapping,
2371 loff_t pos, loff_t *bytes)
2372{
2373 struct inode *inode = mapping->host;
2374 unsigned int blocksize = i_blocksize(inode);
2375 struct page *page;
2376 void *fsdata;
2377 pgoff_t index, curidx;
2378 loff_t curpos;
2379 unsigned zerofrom, offset, len;
2380 int err = 0;
2381
2382 index = pos >> PAGE_SHIFT;
2383 offset = pos & ~PAGE_MASK;
2384
2385 while (index > (curidx = (curpos = *bytes)>>PAGE_SHIFT)) {
2386 zerofrom = curpos & ~PAGE_MASK;
2387 if (zerofrom & (blocksize-1)) {
2388 *bytes |= (blocksize-1);
2389 (*bytes)++;
2390 }
2391 len = PAGE_SIZE - zerofrom;
2392
2393 err = pagecache_write_begin(file, mapping, curpos, len, 0,
2394 &page, &fsdata);
2395 if (err)
2396 goto out;
2397 zero_user(page, zerofrom, len);
2398 err = pagecache_write_end(file, mapping, curpos, len, len,
2399 page, fsdata);
2400 if (err < 0)
2401 goto out;
2402 BUG_ON(err != len);
2403 err = 0;
2404
2405 balance_dirty_pages_ratelimited(mapping);
2406
2407 if (fatal_signal_pending(current)) {
2408 err = -EINTR;
2409 goto out;
2410 }
2411 }
2412
2413
2414 if (index == curidx) {
2415 zerofrom = curpos & ~PAGE_MASK;
2416
2417 if (offset <= zerofrom) {
2418 goto out;
2419 }
2420 if (zerofrom & (blocksize-1)) {
2421 *bytes |= (blocksize-1);
2422 (*bytes)++;
2423 }
2424 len = offset - zerofrom;
2425
2426 err = pagecache_write_begin(file, mapping, curpos, len, 0,
2427 &page, &fsdata);
2428 if (err)
2429 goto out;
2430 zero_user(page, zerofrom, len);
2431 err = pagecache_write_end(file, mapping, curpos, len, len,
2432 page, fsdata);
2433 if (err < 0)
2434 goto out;
2435 BUG_ON(err != len);
2436 err = 0;
2437 }
2438out:
2439 return err;
2440}
2441
2442
2443
2444
2445
2446int cont_write_begin(struct file *file, struct address_space *mapping,
2447 loff_t pos, unsigned len, unsigned flags,
2448 struct page **pagep, void **fsdata,
2449 get_block_t *get_block, loff_t *bytes)
2450{
2451 struct inode *inode = mapping->host;
2452 unsigned int blocksize = i_blocksize(inode);
2453 unsigned int zerofrom;
2454 int err;
2455
2456 err = cont_expand_zero(file, mapping, pos, bytes);
2457 if (err)
2458 return err;
2459
2460 zerofrom = *bytes & ~PAGE_MASK;
2461 if (pos+len > *bytes && zerofrom & (blocksize-1)) {
2462 *bytes |= (blocksize-1);
2463 (*bytes)++;
2464 }
2465
2466 return block_write_begin(mapping, pos, len, flags, pagep, get_block);
2467}
2468EXPORT_SYMBOL(cont_write_begin);
2469
2470int block_commit_write(struct page *page, unsigned from, unsigned to)
2471{
2472 struct inode *inode = page->mapping->host;
2473 __block_commit_write(inode,page,from,to);
2474 return 0;
2475}
2476EXPORT_SYMBOL(block_commit_write);
2477
2478
2479
2480
2481
2482
2483
2484
2485
2486
2487
2488
2489
2490
2491
2492
2493
2494
2495
2496int block_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf,
2497 get_block_t get_block)
2498{
2499 struct page *page = vmf->page;
2500 struct inode *inode = file_inode(vma->vm_file);
2501 unsigned long end;
2502 loff_t size;
2503 int ret;
2504
2505 lock_page(page);
2506 size = i_size_read(inode);
2507 if ((page->mapping != inode->i_mapping) ||
2508 (page_offset(page) > size)) {
2509
2510 ret = -EFAULT;
2511 goto out_unlock;
2512 }
2513
2514
2515 if (((page->index + 1) << PAGE_SHIFT) > size)
2516 end = size & ~PAGE_MASK;
2517 else
2518 end = PAGE_SIZE;
2519
2520 ret = __block_write_begin(page, 0, end, get_block);
2521 if (!ret)
2522 ret = block_commit_write(page, 0, end);
2523
2524 if (unlikely(ret < 0))
2525 goto out_unlock;
2526 set_page_dirty(page);
2527 wait_for_stable_page(page);
2528 return 0;
2529out_unlock:
2530 unlock_page(page);
2531 return ret;
2532}
2533EXPORT_SYMBOL(block_page_mkwrite);
2534
2535
2536
2537
2538
2539
2540static void end_buffer_read_nobh(struct buffer_head *bh, int uptodate)
2541{
2542 __end_buffer_read_notouch(bh, uptodate);
2543}
2544
2545
2546
2547
2548
2549
2550static void attach_nobh_buffers(struct page *page, struct buffer_head *head)
2551{
2552 struct buffer_head *bh;
2553
2554 BUG_ON(!PageLocked(page));
2555
2556 spin_lock(&page->mapping->private_lock);
2557 bh = head;
2558 do {
2559 if (PageDirty(page))
2560 set_buffer_dirty(bh);
2561 if (!bh->b_this_page)
2562 bh->b_this_page = head;
2563 bh = bh->b_this_page;
2564 } while (bh != head);
2565 attach_page_buffers(page, head);
2566 spin_unlock(&page->mapping->private_lock);
2567}
2568
2569
2570
2571
2572
2573
2574int nobh_write_begin(struct address_space *mapping,
2575 loff_t pos, unsigned len, unsigned flags,
2576 struct page **pagep, void **fsdata,
2577 get_block_t *get_block)
2578{
2579 struct inode *inode = mapping->host;
2580 const unsigned blkbits = inode->i_blkbits;
2581 const unsigned blocksize = 1 << blkbits;
2582 struct buffer_head *head, *bh;
2583 struct page *page;
2584 pgoff_t index;
2585 unsigned from, to;
2586 unsigned block_in_page;
2587 unsigned block_start, block_end;
2588 sector_t block_in_file;
2589 int nr_reads = 0;
2590 int ret = 0;
2591 int is_mapped_to_disk = 1;
2592
2593 index = pos >> PAGE_SHIFT;
2594 from = pos & (PAGE_SIZE - 1);
2595 to = from + len;
2596
2597 page = grab_cache_page_write_begin(mapping, index, flags);
2598 if (!page)
2599 return -ENOMEM;
2600 *pagep = page;
2601 *fsdata = NULL;
2602
2603 if (page_has_buffers(page)) {
2604 ret = __block_write_begin(page, pos, len, get_block);
2605 if (unlikely(ret))
2606 goto out_release;
2607 return ret;
2608 }
2609
2610 if (PageMappedToDisk(page))
2611 return 0;
2612
2613
2614
2615
2616
2617
2618
2619
2620
2621
2622 head = alloc_page_buffers(page, blocksize, false);
2623 if (!head) {
2624 ret = -ENOMEM;
2625 goto out_release;
2626 }
2627
2628 block_in_file = (sector_t)page->index << (PAGE_SHIFT - blkbits);
2629
2630
2631
2632
2633
2634
2635 for (block_start = 0, block_in_page = 0, bh = head;
2636 block_start < PAGE_SIZE;
2637 block_in_page++, block_start += blocksize, bh = bh->b_this_page) {
2638 int create;
2639
2640 block_end = block_start + blocksize;
2641 bh->b_state = 0;
2642 create = 1;
2643 if (block_start >= to)
2644 create = 0;
2645 ret = get_block(inode, block_in_file + block_in_page,
2646 bh, create);
2647 if (ret)
2648 goto failed;
2649 if (!buffer_mapped(bh))
2650 is_mapped_to_disk = 0;
2651 if (buffer_new(bh))
2652 clean_bdev_bh_alias(bh);
2653 if (PageUptodate(page)) {
2654 set_buffer_uptodate(bh);
2655 continue;
2656 }
2657 if (buffer_new(bh) || !buffer_mapped(bh)) {
2658 zero_user_segments(page, block_start, from,
2659 to, block_end);
2660 continue;
2661 }
2662 if (buffer_uptodate(bh))
2663 continue;
2664 if (block_start < from || block_end > to) {
2665 lock_buffer(bh);
2666 bh->b_end_io = end_buffer_read_nobh;
2667 submit_bh(REQ_OP_READ, 0, bh);
2668 nr_reads++;
2669 }
2670 }
2671
2672 if (nr_reads) {
2673
2674
2675
2676
2677
2678 for (bh = head; bh; bh = bh->b_this_page) {
2679 wait_on_buffer(bh);
2680 if (!buffer_uptodate(bh))
2681 ret = -EIO;
2682 }
2683 if (ret)
2684 goto failed;
2685 }
2686
2687 if (is_mapped_to_disk)
2688 SetPageMappedToDisk(page);
2689
2690 *fsdata = head;
2691
2692 return 0;
2693
2694failed:
2695 BUG_ON(!ret);
2696
2697
2698
2699
2700
2701
2702
2703 attach_nobh_buffers(page, head);
2704 page_zero_new_buffers(page, from, to);
2705
2706out_release:
2707 unlock_page(page);
2708 put_page(page);
2709 *pagep = NULL;
2710
2711 return ret;
2712}
2713EXPORT_SYMBOL(nobh_write_begin);
2714
2715int nobh_write_end(struct file *file, struct address_space *mapping,
2716 loff_t pos, unsigned len, unsigned copied,
2717 struct page *page, void *fsdata)
2718{
2719 struct inode *inode = page->mapping->host;
2720 struct buffer_head *head = fsdata;
2721 struct buffer_head *bh;
2722 BUG_ON(fsdata != NULL && page_has_buffers(page));
2723
2724 if (unlikely(copied < len) && head)
2725 attach_nobh_buffers(page, head);
2726 if (page_has_buffers(page))
2727 return generic_write_end(file, mapping, pos, len,
2728 copied, page, fsdata);
2729
2730 SetPageUptodate(page);
2731 set_page_dirty(page);
2732 if (pos+copied > inode->i_size) {
2733 i_size_write(inode, pos+copied);
2734 mark_inode_dirty(inode);
2735 }
2736
2737 unlock_page(page);
2738 put_page(page);
2739
2740 while (head) {
2741 bh = head;
2742 head = head->b_this_page;
2743 free_buffer_head(bh);
2744 }
2745
2746 return copied;
2747}
2748EXPORT_SYMBOL(nobh_write_end);
2749
2750
2751
2752
2753
2754
2755int nobh_writepage(struct page *page, get_block_t *get_block,
2756 struct writeback_control *wbc)
2757{
2758 struct inode * const inode = page->mapping->host;
2759 loff_t i_size = i_size_read(inode);
2760 const pgoff_t end_index = i_size >> PAGE_SHIFT;
2761 unsigned offset;
2762 int ret;
2763
2764
2765 if (page->index < end_index)
2766 goto out;
2767
2768
2769 offset = i_size & (PAGE_SIZE-1);
2770 if (page->index >= end_index+1 || !offset) {
2771
2772
2773
2774
2775
2776#if 0
2777
2778 if (page->mapping->a_ops->invalidatepage)
2779 page->mapping->a_ops->invalidatepage(page, offset);
2780#endif
2781 unlock_page(page);
2782 return 0;
2783 }
2784
2785
2786
2787
2788
2789
2790
2791
2792 zero_user_segment(page, offset, PAGE_SIZE);
2793out:
2794 ret = mpage_writepage(page, get_block, wbc);
2795 if (ret == -EAGAIN)
2796 ret = __block_write_full_page(inode, page, get_block, wbc,
2797 end_buffer_async_write);
2798 return ret;
2799}
2800EXPORT_SYMBOL(nobh_writepage);
2801
2802int nobh_truncate_page(struct address_space *mapping,
2803 loff_t from, get_block_t *get_block)
2804{
2805 pgoff_t index = from >> PAGE_SHIFT;
2806 unsigned offset = from & (PAGE_SIZE-1);
2807 unsigned blocksize;
2808 sector_t iblock;
2809 unsigned length, pos;
2810 struct inode *inode = mapping->host;
2811 struct page *page;
2812 struct buffer_head map_bh;
2813 int err;
2814
2815 blocksize = i_blocksize(inode);
2816 length = offset & (blocksize - 1);
2817
2818
2819 if (!length)
2820 return 0;
2821
2822 length = blocksize - length;
2823 iblock = (sector_t)index << (PAGE_SHIFT - inode->i_blkbits);
2824
2825 page = grab_cache_page(mapping, index);
2826 err = -ENOMEM;
2827 if (!page)
2828 goto out;
2829
2830 if (page_has_buffers(page)) {
2831has_buffers:
2832 unlock_page(page);
2833 put_page(page);
2834 return block_truncate_page(mapping, from, get_block);
2835 }
2836
2837
2838 pos = blocksize;
2839 while (offset >= pos) {
2840 iblock++;
2841 pos += blocksize;
2842 }
2843
2844 map_bh.b_size = blocksize;
2845 map_bh.b_state = 0;
2846 err = get_block(inode, iblock, &map_bh, 0);
2847 if (err)
2848 goto unlock;
2849
2850 if (!buffer_mapped(&map_bh))
2851 goto unlock;
2852
2853
2854 if (!PageUptodate(page)) {
2855 err = mapping->a_ops->readpage(NULL, page);
2856 if (err) {
2857 put_page(page);
2858 goto out;
2859 }
2860 lock_page(page);
2861 if (!PageUptodate(page)) {
2862 err = -EIO;
2863 goto unlock;
2864 }
2865 if (page_has_buffers(page))
2866 goto has_buffers;
2867 }
2868 zero_user(page, offset, length);
2869 set_page_dirty(page);
2870 err = 0;
2871
2872unlock:
2873 unlock_page(page);
2874 put_page(page);
2875out:
2876 return err;
2877}
2878EXPORT_SYMBOL(nobh_truncate_page);
2879
2880int block_truncate_page(struct address_space *mapping,
2881 loff_t from, get_block_t *get_block)
2882{
2883 pgoff_t index = from >> PAGE_SHIFT;
2884 unsigned offset = from & (PAGE_SIZE-1);
2885 unsigned blocksize;
2886 sector_t iblock;
2887 unsigned length, pos;
2888 struct inode *inode = mapping->host;
2889 struct page *page;
2890 struct buffer_head *bh;
2891 int err;
2892
2893 blocksize = i_blocksize(inode);
2894 length = offset & (blocksize - 1);
2895
2896
2897 if (!length)
2898 return 0;
2899
2900 length = blocksize - length;
2901 iblock = (sector_t)index << (PAGE_SHIFT - inode->i_blkbits);
2902
2903 page = grab_cache_page(mapping, index);
2904 err = -ENOMEM;
2905 if (!page)
2906 goto out;
2907
2908 if (!page_has_buffers(page))
2909 create_empty_buffers(page, blocksize, 0);
2910
2911
2912 bh = page_buffers(page);
2913 pos = blocksize;
2914 while (offset >= pos) {
2915 bh = bh->b_this_page;
2916 iblock++;
2917 pos += blocksize;
2918 }
2919
2920 err = 0;
2921 if (!buffer_mapped(bh)) {
2922 WARN_ON(bh->b_size != blocksize);
2923 err = get_block(inode, iblock, bh, 0);
2924 if (err)
2925 goto unlock;
2926
2927 if (!buffer_mapped(bh))
2928 goto unlock;
2929 }
2930
2931
2932 if (PageUptodate(page))
2933 set_buffer_uptodate(bh);
2934
2935 if (!buffer_uptodate(bh) && !buffer_delay(bh) && !buffer_unwritten(bh)) {
2936 err = -EIO;
2937 ll_rw_block(REQ_OP_READ, 0, 1, &bh);
2938 wait_on_buffer(bh);
2939
2940 if (!buffer_uptodate(bh))
2941 goto unlock;
2942 }
2943
2944 zero_user(page, offset, length);
2945 mark_buffer_dirty(bh);
2946 err = 0;
2947
2948unlock:
2949 unlock_page(page);
2950 put_page(page);
2951out:
2952 return err;
2953}
2954EXPORT_SYMBOL(block_truncate_page);
2955
2956
2957
2958
2959int block_write_full_page(struct page *page, get_block_t *get_block,
2960 struct writeback_control *wbc)
2961{
2962 struct inode * const inode = page->mapping->host;
2963 loff_t i_size = i_size_read(inode);
2964 const pgoff_t end_index = i_size >> PAGE_SHIFT;
2965 unsigned offset;
2966
2967
2968 if (page->index < end_index)
2969 return __block_write_full_page(inode, page, get_block, wbc,
2970 end_buffer_async_write);
2971
2972
2973 offset = i_size & (PAGE_SIZE-1);
2974 if (page->index >= end_index+1 || !offset) {
2975
2976
2977
2978
2979
2980 do_invalidatepage(page, 0, PAGE_SIZE);
2981 unlock_page(page);
2982 return 0;
2983 }
2984
2985
2986
2987
2988
2989
2990
2991
2992 zero_user_segment(page, offset, PAGE_SIZE);
2993 return __block_write_full_page(inode, page, get_block, wbc,
2994 end_buffer_async_write);
2995}
2996EXPORT_SYMBOL(block_write_full_page);
2997
2998sector_t generic_block_bmap(struct address_space *mapping, sector_t block,
2999 get_block_t *get_block)
3000{
3001 struct inode *inode = mapping->host;
3002 struct buffer_head tmp = {
3003 .b_size = i_blocksize(inode),
3004 };
3005
3006 get_block(inode, block, &tmp, 0);
3007 return tmp.b_blocknr;
3008}
3009EXPORT_SYMBOL(generic_block_bmap);
3010
3011static void end_bio_bh_io_sync(struct bio *bio)
3012{
3013 struct buffer_head *bh = bio->bi_private;
3014
3015 if (unlikely(bio_flagged(bio, BIO_QUIET)))
3016 set_bit(BH_Quiet, &bh->b_state);
3017
3018 bh->b_end_io(bh, !bio->bi_status);
3019 bio_put(bio);
3020}
3021
3022
3023
3024
3025
3026
3027
3028
3029
3030
3031
3032
3033
3034void guard_bio_eod(struct bio *bio)
3035{
3036 sector_t maxsector;
3037 struct hd_struct *part;
3038
3039 rcu_read_lock();
3040 part = __disk_get_part(bio->bi_disk, bio->bi_partno);
3041 if (part)
3042 maxsector = part_nr_sects_read(part);
3043 else
3044 maxsector = get_capacity(bio->bi_disk);
3045 rcu_read_unlock();
3046
3047 if (!maxsector)
3048 return;
3049
3050
3051
3052
3053
3054
3055 if (unlikely(bio->bi_iter.bi_sector >= maxsector))
3056 return;
3057
3058 maxsector -= bio->bi_iter.bi_sector;
3059 if (likely((bio->bi_iter.bi_size >> 9) <= maxsector))
3060 return;
3061
3062 bio_truncate(bio, maxsector << 9);
3063}
3064
3065static int submit_bh_wbc(int op, int op_flags, struct buffer_head *bh,
3066 enum rw_hint write_hint, struct writeback_control *wbc)
3067{
3068 struct bio *bio;
3069
3070 BUG_ON(!buffer_locked(bh));
3071 BUG_ON(!buffer_mapped(bh));
3072 BUG_ON(!bh->b_end_io);
3073 BUG_ON(buffer_delay(bh));
3074 BUG_ON(buffer_unwritten(bh));
3075
3076
3077
3078
3079 if (test_set_buffer_req(bh) && (op == REQ_OP_WRITE))
3080 clear_buffer_write_io_error(bh);
3081
3082
3083
3084
3085
3086 bio = bio_alloc(GFP_NOIO, 1);
3087
3088 bio->bi_iter.bi_sector = bh->b_blocknr * (bh->b_size >> 9);
3089 bio_set_dev(bio, bh->b_bdev);
3090 bio->bi_write_hint = write_hint;
3091
3092 bio_add_page(bio, bh->b_page, bh->b_size, bh_offset(bh));
3093 BUG_ON(bio->bi_iter.bi_size != bh->b_size);
3094
3095 bio->bi_end_io = end_bio_bh_io_sync;
3096 bio->bi_private = bh;
3097
3098 if (buffer_meta(bh))
3099 op_flags |= REQ_META;
3100 if (buffer_prio(bh))
3101 op_flags |= REQ_PRIO;
3102 bio_set_op_attrs(bio, op, op_flags);
3103
3104
3105 guard_bio_eod(bio);
3106
3107 if (wbc) {
3108 wbc_init_bio(wbc, bio);
3109 wbc_account_cgroup_owner(wbc, bh->b_page, bh->b_size);
3110 }
3111
3112 submit_bio(bio);
3113 return 0;
3114}
3115
3116int submit_bh(int op, int op_flags, struct buffer_head *bh)
3117{
3118 return submit_bh_wbc(op, op_flags, bh, 0, NULL);
3119}
3120EXPORT_SYMBOL(submit_bh);
3121
3122
3123
3124
3125
3126
3127
3128
3129
3130
3131
3132
3133
3134
3135
3136
3137
3138
3139
3140
3141
3142
3143
3144
3145
3146
3147
3148void ll_rw_block(int op, int op_flags, int nr, struct buffer_head *bhs[])
3149{
3150 int i;
3151
3152 for (i = 0; i < nr; i++) {
3153 struct buffer_head *bh = bhs[i];
3154
3155 if (!trylock_buffer(bh))
3156 continue;
3157 if (op == WRITE) {
3158 if (test_clear_buffer_dirty(bh)) {
3159 bh->b_end_io = end_buffer_write_sync;
3160 get_bh(bh);
3161 submit_bh(op, op_flags, bh);
3162 continue;
3163 }
3164 } else {
3165 if (!buffer_uptodate(bh)) {
3166 bh->b_end_io = end_buffer_read_sync;
3167 get_bh(bh);
3168 submit_bh(op, op_flags, bh);
3169 continue;
3170 }
3171 }
3172 unlock_buffer(bh);
3173 }
3174}
3175EXPORT_SYMBOL(ll_rw_block);
3176
3177void write_dirty_buffer(struct buffer_head *bh, int op_flags)
3178{
3179 lock_buffer(bh);
3180 if (!test_clear_buffer_dirty(bh)) {
3181 unlock_buffer(bh);
3182 return;
3183 }
3184 bh->b_end_io = end_buffer_write_sync;
3185 get_bh(bh);
3186 submit_bh(REQ_OP_WRITE, op_flags, bh);
3187}
3188EXPORT_SYMBOL(write_dirty_buffer);
3189
3190
3191
3192
3193
3194
3195int __sync_dirty_buffer(struct buffer_head *bh, int op_flags)
3196{
3197 int ret = 0;
3198
3199 WARN_ON(atomic_read(&bh->b_count) < 1);
3200 lock_buffer(bh);
3201 if (test_clear_buffer_dirty(bh)) {
3202 get_bh(bh);
3203 bh->b_end_io = end_buffer_write_sync;
3204 ret = submit_bh(REQ_OP_WRITE, op_flags, bh);
3205 wait_on_buffer(bh);
3206 if (!ret && !buffer_uptodate(bh))
3207 ret = -EIO;
3208 } else {
3209 unlock_buffer(bh);
3210 }
3211 return ret;
3212}
3213EXPORT_SYMBOL(__sync_dirty_buffer);
3214
3215int sync_dirty_buffer(struct buffer_head *bh)
3216{
3217 return __sync_dirty_buffer(bh, REQ_SYNC);
3218}
3219EXPORT_SYMBOL(sync_dirty_buffer);
3220
3221
3222
3223
3224
3225
3226
3227
3228
3229
3230
3231
3232
3233
3234
3235
3236
3237
3238
3239
3240
3241static inline int buffer_busy(struct buffer_head *bh)
3242{
3243 return atomic_read(&bh->b_count) |
3244 (bh->b_state & ((1 << BH_Dirty) | (1 << BH_Lock)));
3245}
3246
3247static int
3248drop_buffers(struct page *page, struct buffer_head **buffers_to_free)
3249{
3250 struct buffer_head *head = page_buffers(page);
3251 struct buffer_head *bh;
3252
3253 bh = head;
3254 do {
3255 if (buffer_busy(bh))
3256 goto failed;
3257 bh = bh->b_this_page;
3258 } while (bh != head);
3259
3260 do {
3261 struct buffer_head *next = bh->b_this_page;
3262
3263 if (bh->b_assoc_map)
3264 __remove_assoc_queue(bh);
3265 bh = next;
3266 } while (bh != head);
3267 *buffers_to_free = head;
3268 __clear_page_buffers(page);
3269 return 1;
3270failed:
3271 return 0;
3272}
3273
3274int try_to_free_buffers(struct page *page)
3275{
3276 struct address_space * const mapping = page->mapping;
3277 struct buffer_head *buffers_to_free = NULL;
3278 int ret = 0;
3279
3280 BUG_ON(!PageLocked(page));
3281 if (PageWriteback(page))
3282 return 0;
3283
3284 if (mapping == NULL) {
3285 ret = drop_buffers(page, &buffers_to_free);
3286 goto out;
3287 }
3288
3289 spin_lock(&mapping->private_lock);
3290 ret = drop_buffers(page, &buffers_to_free);
3291
3292
3293
3294
3295
3296
3297
3298
3299
3300
3301
3302
3303
3304
3305
3306 if (ret)
3307 cancel_dirty_page(page);
3308 spin_unlock(&mapping->private_lock);
3309out:
3310 if (buffers_to_free) {
3311 struct buffer_head *bh = buffers_to_free;
3312
3313 do {
3314 struct buffer_head *next = bh->b_this_page;
3315 free_buffer_head(bh);
3316 bh = next;
3317 } while (bh != buffers_to_free);
3318 }
3319 return ret;
3320}
3321EXPORT_SYMBOL(try_to_free_buffers);
3322
3323
3324
3325
3326
3327
3328
3329
3330SYSCALL_DEFINE2(bdflush, int, func, long, data)
3331{
3332 static int msg_count;
3333
3334 if (!capable(CAP_SYS_ADMIN))
3335 return -EPERM;
3336
3337 if (msg_count < 5) {
3338 msg_count++;
3339 printk(KERN_INFO
3340 "warning: process `%s' used the obsolete bdflush"
3341 " system call\n", current->comm);
3342 printk(KERN_INFO "Fix your initscripts?\n");
3343 }
3344
3345 if (func == 1)
3346 do_exit(0);
3347 return 0;
3348}
3349
3350
3351
3352
3353static struct kmem_cache *bh_cachep __read_mostly;
3354
3355
3356
3357
3358
3359static unsigned long max_buffer_heads;
3360
3361int buffer_heads_over_limit;
3362
3363struct bh_accounting {
3364 int nr;
3365 int ratelimit;
3366};
3367
3368static DEFINE_PER_CPU(struct bh_accounting, bh_accounting) = {0, 0};
3369
3370static void recalc_bh_state(void)
3371{
3372 int i;
3373 int tot = 0;
3374
3375 if (__this_cpu_inc_return(bh_accounting.ratelimit) - 1 < 4096)
3376 return;
3377 __this_cpu_write(bh_accounting.ratelimit, 0);
3378 for_each_online_cpu(i)
3379 tot += per_cpu(bh_accounting, i).nr;
3380 buffer_heads_over_limit = (tot > max_buffer_heads);
3381}
3382
3383struct buffer_head *alloc_buffer_head(gfp_t gfp_flags)
3384{
3385 struct buffer_head *ret = kmem_cache_zalloc(bh_cachep, gfp_flags);
3386 if (ret) {
3387 INIT_LIST_HEAD(&ret->b_assoc_buffers);
3388 preempt_disable();
3389 __this_cpu_inc(bh_accounting.nr);
3390 recalc_bh_state();
3391 preempt_enable();
3392 }
3393 return ret;
3394}
3395EXPORT_SYMBOL(alloc_buffer_head);
3396
3397void free_buffer_head(struct buffer_head *bh)
3398{
3399 BUG_ON(!list_empty(&bh->b_assoc_buffers));
3400 kmem_cache_free(bh_cachep, bh);
3401 preempt_disable();
3402 __this_cpu_dec(bh_accounting.nr);
3403 recalc_bh_state();
3404 preempt_enable();
3405}
3406EXPORT_SYMBOL(free_buffer_head);
3407
3408static int buffer_exit_cpu_dead(unsigned int cpu)
3409{
3410 int i;
3411 struct bh_lru *b = &per_cpu(bh_lrus, cpu);
3412
3413 for (i = 0; i < BH_LRU_SIZE; i++) {
3414 brelse(b->bhs[i]);
3415 b->bhs[i] = NULL;
3416 }
3417 this_cpu_add(bh_accounting.nr, per_cpu(bh_accounting, cpu).nr);
3418 per_cpu(bh_accounting, cpu).nr = 0;
3419 return 0;
3420}
3421
3422
3423
3424
3425
3426
3427
3428
3429int bh_uptodate_or_lock(struct buffer_head *bh)
3430{
3431 if (!buffer_uptodate(bh)) {
3432 lock_buffer(bh);
3433 if (!buffer_uptodate(bh))
3434 return 0;
3435 unlock_buffer(bh);
3436 }
3437 return 1;
3438}
3439EXPORT_SYMBOL(bh_uptodate_or_lock);
3440
3441
3442
3443
3444
3445
3446
3447int bh_submit_read(struct buffer_head *bh)
3448{
3449 BUG_ON(!buffer_locked(bh));
3450
3451 if (buffer_uptodate(bh)) {
3452 unlock_buffer(bh);
3453 return 0;
3454 }
3455
3456 get_bh(bh);
3457 bh->b_end_io = end_buffer_read_sync;
3458 submit_bh(REQ_OP_READ, 0, bh);
3459 wait_on_buffer(bh);
3460 if (buffer_uptodate(bh))
3461 return 0;
3462 return -EIO;
3463}
3464EXPORT_SYMBOL(bh_submit_read);
3465
3466void __init buffer_init(void)
3467{
3468 unsigned long nrpages;
3469 int ret;
3470
3471 bh_cachep = kmem_cache_create("buffer_head",
3472 sizeof(struct buffer_head), 0,
3473 (SLAB_RECLAIM_ACCOUNT|SLAB_PANIC|
3474 SLAB_MEM_SPREAD),
3475 NULL);
3476
3477
3478
3479
3480 nrpages = (nr_free_buffer_pages() * 10) / 100;
3481 max_buffer_heads = nrpages * (PAGE_SIZE / sizeof(struct buffer_head));
3482 ret = cpuhp_setup_state_nocalls(CPUHP_FS_BUFF_DEAD, "fs/buffer:dead",
3483 NULL, buffer_exit_cpu_dead);
3484 WARN_ON(ret < 0);
3485}
3486