1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22#include <linux/kernel.h>
23#include <linux/sched/signal.h>
24#include <linux/syscalls.h>
25#include <linux/fs.h>
26#include <linux/iomap.h>
27#include <linux/mm.h>
28#include <linux/percpu.h>
29#include <linux/slab.h>
30#include <linux/capability.h>
31#include <linux/blkdev.h>
32#include <linux/file.h>
33#include <linux/quotaops.h>
34#include <linux/highmem.h>
35#include <linux/export.h>
36#include <linux/backing-dev.h>
37#include <linux/writeback.h>
38#include <linux/hash.h>
39#include <linux/suspend.h>
40#include <linux/buffer_head.h>
41#include <linux/task_io_accounting_ops.h>
42#include <linux/bio.h>
43#include <linux/cpu.h>
44#include <linux/bitops.h>
45#include <linux/mpage.h>
46#include <linux/bit_spinlock.h>
47#include <linux/pagevec.h>
48#include <linux/sched/mm.h>
49#include <trace/events/block.h>
50#include <linux/fscrypt.h>
51
52#include "internal.h"
53
54static int fsync_buffers_list(spinlock_t *lock, struct list_head *list);
55static int submit_bh_wbc(int op, int op_flags, struct buffer_head *bh,
56 enum rw_hint hint, struct writeback_control *wbc);
57
58#define BH_ENTRY(list) list_entry((list), struct buffer_head, b_assoc_buffers)
59
60inline void touch_buffer(struct buffer_head *bh)
61{
62 trace_block_touch_buffer(bh);
63 mark_page_accessed(bh->b_page);
64}
65EXPORT_SYMBOL(touch_buffer);
66
67void __lock_buffer(struct buffer_head *bh)
68{
69 wait_on_bit_lock_io(&bh->b_state, BH_Lock, TASK_UNINTERRUPTIBLE);
70}
71EXPORT_SYMBOL(__lock_buffer);
72
73void unlock_buffer(struct buffer_head *bh)
74{
75 clear_bit_unlock(BH_Lock, &bh->b_state);
76 smp_mb__after_atomic();
77 wake_up_bit(&bh->b_state, BH_Lock);
78}
79EXPORT_SYMBOL(unlock_buffer);
80
81
82
83
84
85
86void buffer_check_dirty_writeback(struct page *page,
87 bool *dirty, bool *writeback)
88{
89 struct buffer_head *head, *bh;
90 *dirty = false;
91 *writeback = false;
92
93 BUG_ON(!PageLocked(page));
94
95 if (!page_has_buffers(page))
96 return;
97
98 if (PageWriteback(page))
99 *writeback = true;
100
101 head = page_buffers(page);
102 bh = head;
103 do {
104 if (buffer_locked(bh))
105 *writeback = true;
106
107 if (buffer_dirty(bh))
108 *dirty = true;
109
110 bh = bh->b_this_page;
111 } while (bh != head);
112}
113EXPORT_SYMBOL(buffer_check_dirty_writeback);
114
115
116
117
118
119
120void __wait_on_buffer(struct buffer_head * bh)
121{
122 wait_on_bit_io(&bh->b_state, BH_Lock, TASK_UNINTERRUPTIBLE);
123}
124EXPORT_SYMBOL(__wait_on_buffer);
125
126static void buffer_io_error(struct buffer_head *bh, char *msg)
127{
128 if (!test_bit(BH_Quiet, &bh->b_state))
129 printk_ratelimited(KERN_ERR
130 "Buffer I/O error on dev %pg, logical block %llu%s\n",
131 bh->b_bdev, (unsigned long long)bh->b_blocknr, msg);
132}
133
134
135
136
137
138
139
140
141
142static void __end_buffer_read_notouch(struct buffer_head *bh, int uptodate)
143{
144 if (uptodate) {
145 set_buffer_uptodate(bh);
146 } else {
147
148 clear_buffer_uptodate(bh);
149 }
150 unlock_buffer(bh);
151}
152
153
154
155
156
157void end_buffer_read_sync(struct buffer_head *bh, int uptodate)
158{
159 __end_buffer_read_notouch(bh, uptodate);
160 put_bh(bh);
161}
162EXPORT_SYMBOL(end_buffer_read_sync);
163
164void end_buffer_write_sync(struct buffer_head *bh, int uptodate)
165{
166 if (uptodate) {
167 set_buffer_uptodate(bh);
168 } else {
169 buffer_io_error(bh, ", lost sync page write");
170 mark_buffer_write_io_error(bh);
171 clear_buffer_uptodate(bh);
172 }
173 unlock_buffer(bh);
174 put_bh(bh);
175}
176EXPORT_SYMBOL(end_buffer_write_sync);
177
178
179
180
181
182
183
184
185
186
187
188static struct buffer_head *
189__find_get_block_slow(struct block_device *bdev, sector_t block)
190{
191 struct inode *bd_inode = bdev->bd_inode;
192 struct address_space *bd_mapping = bd_inode->i_mapping;
193 struct buffer_head *ret = NULL;
194 pgoff_t index;
195 struct buffer_head *bh;
196 struct buffer_head *head;
197 struct page *page;
198 int all_mapped = 1;
199 static DEFINE_RATELIMIT_STATE(last_warned, HZ, 1);
200
201 index = block >> (PAGE_SHIFT - bd_inode->i_blkbits);
202 page = find_get_page_flags(bd_mapping, index, FGP_ACCESSED);
203 if (!page)
204 goto out;
205
206 spin_lock(&bd_mapping->private_lock);
207 if (!page_has_buffers(page))
208 goto out_unlock;
209 head = page_buffers(page);
210 bh = head;
211 do {
212 if (!buffer_mapped(bh))
213 all_mapped = 0;
214 else if (bh->b_blocknr == block) {
215 ret = bh;
216 get_bh(bh);
217 goto out_unlock;
218 }
219 bh = bh->b_this_page;
220 } while (bh != head);
221
222
223
224
225
226
227 ratelimit_set_flags(&last_warned, RATELIMIT_MSG_ON_RELEASE);
228 if (all_mapped && __ratelimit(&last_warned)) {
229 printk("__find_get_block_slow() failed. block=%llu, "
230 "b_blocknr=%llu, b_state=0x%08lx, b_size=%zu, "
231 "device %pg blocksize: %d\n",
232 (unsigned long long)block,
233 (unsigned long long)bh->b_blocknr,
234 bh->b_state, bh->b_size, bdev,
235 1 << bd_inode->i_blkbits);
236 }
237out_unlock:
238 spin_unlock(&bd_mapping->private_lock);
239 put_page(page);
240out:
241 return ret;
242}
243
244static void end_buffer_async_read(struct buffer_head *bh, int uptodate)
245{
246 unsigned long flags;
247 struct buffer_head *first;
248 struct buffer_head *tmp;
249 struct page *page;
250 int page_uptodate = 1;
251
252 BUG_ON(!buffer_async_read(bh));
253
254 page = bh->b_page;
255 if (uptodate) {
256 set_buffer_uptodate(bh);
257 } else {
258 clear_buffer_uptodate(bh);
259 buffer_io_error(bh, ", async page read");
260 SetPageError(page);
261 }
262
263
264
265
266
267
268 first = page_buffers(page);
269 spin_lock_irqsave(&first->b_uptodate_lock, flags);
270 clear_buffer_async_read(bh);
271 unlock_buffer(bh);
272 tmp = bh;
273 do {
274 if (!buffer_uptodate(tmp))
275 page_uptodate = 0;
276 if (buffer_async_read(tmp)) {
277 BUG_ON(!buffer_locked(tmp));
278 goto still_busy;
279 }
280 tmp = tmp->b_this_page;
281 } while (tmp != bh);
282 spin_unlock_irqrestore(&first->b_uptodate_lock, flags);
283
284
285
286
287
288 if (page_uptodate && !PageError(page))
289 SetPageUptodate(page);
290 unlock_page(page);
291 return;
292
293still_busy:
294 spin_unlock_irqrestore(&first->b_uptodate_lock, flags);
295 return;
296}
297
298struct decrypt_bh_ctx {
299 struct work_struct work;
300 struct buffer_head *bh;
301};
302
303static void decrypt_bh(struct work_struct *work)
304{
305 struct decrypt_bh_ctx *ctx =
306 container_of(work, struct decrypt_bh_ctx, work);
307 struct buffer_head *bh = ctx->bh;
308 int err;
309
310 err = fscrypt_decrypt_pagecache_blocks(bh->b_page, bh->b_size,
311 bh_offset(bh));
312 end_buffer_async_read(bh, err == 0);
313 kfree(ctx);
314}
315
316
317
318
319
320static void end_buffer_async_read_io(struct buffer_head *bh, int uptodate)
321{
322
323 if (uptodate &&
324 fscrypt_inode_uses_fs_layer_crypto(bh->b_page->mapping->host)) {
325 struct decrypt_bh_ctx *ctx = kmalloc(sizeof(*ctx), GFP_ATOMIC);
326
327 if (ctx) {
328 INIT_WORK(&ctx->work, decrypt_bh);
329 ctx->bh = bh;
330 fscrypt_enqueue_decrypt_work(&ctx->work);
331 return;
332 }
333 uptodate = 0;
334 }
335 end_buffer_async_read(bh, uptodate);
336}
337
338
339
340
341
342void end_buffer_async_write(struct buffer_head *bh, int uptodate)
343{
344 unsigned long flags;
345 struct buffer_head *first;
346 struct buffer_head *tmp;
347 struct page *page;
348
349 BUG_ON(!buffer_async_write(bh));
350
351 page = bh->b_page;
352 if (uptodate) {
353 set_buffer_uptodate(bh);
354 } else {
355 buffer_io_error(bh, ", lost async page write");
356 mark_buffer_write_io_error(bh);
357 clear_buffer_uptodate(bh);
358 SetPageError(page);
359 }
360
361 first = page_buffers(page);
362 spin_lock_irqsave(&first->b_uptodate_lock, flags);
363
364 clear_buffer_async_write(bh);
365 unlock_buffer(bh);
366 tmp = bh->b_this_page;
367 while (tmp != bh) {
368 if (buffer_async_write(tmp)) {
369 BUG_ON(!buffer_locked(tmp));
370 goto still_busy;
371 }
372 tmp = tmp->b_this_page;
373 }
374 spin_unlock_irqrestore(&first->b_uptodate_lock, flags);
375 end_page_writeback(page);
376 return;
377
378still_busy:
379 spin_unlock_irqrestore(&first->b_uptodate_lock, flags);
380 return;
381}
382EXPORT_SYMBOL(end_buffer_async_write);
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405static void mark_buffer_async_read(struct buffer_head *bh)
406{
407 bh->b_end_io = end_buffer_async_read_io;
408 set_buffer_async_read(bh);
409}
410
411static void mark_buffer_async_write_endio(struct buffer_head *bh,
412 bh_end_io_t *handler)
413{
414 bh->b_end_io = handler;
415 set_buffer_async_write(bh);
416}
417
418void mark_buffer_async_write(struct buffer_head *bh)
419{
420 mark_buffer_async_write_endio(bh, end_buffer_async_write);
421}
422EXPORT_SYMBOL(mark_buffer_async_write);
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477static void __remove_assoc_queue(struct buffer_head *bh)
478{
479 list_del_init(&bh->b_assoc_buffers);
480 WARN_ON(!bh->b_assoc_map);
481 bh->b_assoc_map = NULL;
482}
483
484int inode_has_buffers(struct inode *inode)
485{
486 return !list_empty(&inode->i_data.private_list);
487}
488
489
490
491
492
493
494
495
496
497
498
499static int osync_buffers_list(spinlock_t *lock, struct list_head *list)
500{
501 struct buffer_head *bh;
502 struct list_head *p;
503 int err = 0;
504
505 spin_lock(lock);
506repeat:
507 list_for_each_prev(p, list) {
508 bh = BH_ENTRY(p);
509 if (buffer_locked(bh)) {
510 get_bh(bh);
511 spin_unlock(lock);
512 wait_on_buffer(bh);
513 if (!buffer_uptodate(bh))
514 err = -EIO;
515 brelse(bh);
516 spin_lock(lock);
517 goto repeat;
518 }
519 }
520 spin_unlock(lock);
521 return err;
522}
523
524void emergency_thaw_bdev(struct super_block *sb)
525{
526 while (sb->s_bdev && !thaw_bdev(sb->s_bdev, sb))
527 printk(KERN_WARNING "Emergency Thaw on %pg\n", sb->s_bdev);
528}
529
530
531
532
533
534
535
536
537
538
539
540
541int sync_mapping_buffers(struct address_space *mapping)
542{
543 struct address_space *buffer_mapping = mapping->private_data;
544
545 if (buffer_mapping == NULL || list_empty(&mapping->private_list))
546 return 0;
547
548 return fsync_buffers_list(&buffer_mapping->private_lock,
549 &mapping->private_list);
550}
551EXPORT_SYMBOL(sync_mapping_buffers);
552
553
554
555
556
557
558
559void write_boundary_block(struct block_device *bdev,
560 sector_t bblock, unsigned blocksize)
561{
562 struct buffer_head *bh = __find_get_block(bdev, bblock + 1, blocksize);
563 if (bh) {
564 if (buffer_dirty(bh))
565 ll_rw_block(REQ_OP_WRITE, 0, 1, &bh);
566 put_bh(bh);
567 }
568}
569
570void mark_buffer_dirty_inode(struct buffer_head *bh, struct inode *inode)
571{
572 struct address_space *mapping = inode->i_mapping;
573 struct address_space *buffer_mapping = bh->b_page->mapping;
574
575 mark_buffer_dirty(bh);
576 if (!mapping->private_data) {
577 mapping->private_data = buffer_mapping;
578 } else {
579 BUG_ON(mapping->private_data != buffer_mapping);
580 }
581 if (!bh->b_assoc_map) {
582 spin_lock(&buffer_mapping->private_lock);
583 list_move_tail(&bh->b_assoc_buffers,
584 &mapping->private_list);
585 bh->b_assoc_map = mapping;
586 spin_unlock(&buffer_mapping->private_lock);
587 }
588}
589EXPORT_SYMBOL(mark_buffer_dirty_inode);
590
591
592
593
594
595
596
597
598
599
600void __set_page_dirty(struct page *page, struct address_space *mapping,
601 int warn)
602{
603 unsigned long flags;
604
605 xa_lock_irqsave(&mapping->i_pages, flags);
606 if (page->mapping) {
607 WARN_ON_ONCE(warn && !PageUptodate(page));
608 account_page_dirtied(page, mapping);
609 __xa_set_mark(&mapping->i_pages, page_index(page),
610 PAGECACHE_TAG_DIRTY);
611 }
612 xa_unlock_irqrestore(&mapping->i_pages, flags);
613}
614EXPORT_SYMBOL_GPL(__set_page_dirty);
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641int __set_page_dirty_buffers(struct page *page)
642{
643 int newly_dirty;
644 struct address_space *mapping = page_mapping(page);
645
646 if (unlikely(!mapping))
647 return !TestSetPageDirty(page);
648
649 spin_lock(&mapping->private_lock);
650 if (page_has_buffers(page)) {
651 struct buffer_head *head = page_buffers(page);
652 struct buffer_head *bh = head;
653
654 do {
655 set_buffer_dirty(bh);
656 bh = bh->b_this_page;
657 } while (bh != head);
658 }
659
660
661
662
663 lock_page_memcg(page);
664 newly_dirty = !TestSetPageDirty(page);
665 spin_unlock(&mapping->private_lock);
666
667 if (newly_dirty)
668 __set_page_dirty(page, mapping, 1);
669
670 unlock_page_memcg(page);
671
672 if (newly_dirty)
673 __mark_inode_dirty(mapping->host, I_DIRTY_PAGES);
674
675 return newly_dirty;
676}
677EXPORT_SYMBOL(__set_page_dirty_buffers);
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698static int fsync_buffers_list(spinlock_t *lock, struct list_head *list)
699{
700 struct buffer_head *bh;
701 struct list_head tmp;
702 struct address_space *mapping;
703 int err = 0, err2;
704 struct blk_plug plug;
705
706 INIT_LIST_HEAD(&tmp);
707 blk_start_plug(&plug);
708
709 spin_lock(lock);
710 while (!list_empty(list)) {
711 bh = BH_ENTRY(list->next);
712 mapping = bh->b_assoc_map;
713 __remove_assoc_queue(bh);
714
715
716 smp_mb();
717 if (buffer_dirty(bh) || buffer_locked(bh)) {
718 list_add(&bh->b_assoc_buffers, &tmp);
719 bh->b_assoc_map = mapping;
720 if (buffer_dirty(bh)) {
721 get_bh(bh);
722 spin_unlock(lock);
723
724
725
726
727
728
729
730 write_dirty_buffer(bh, REQ_SYNC);
731
732
733
734
735
736
737
738 brelse(bh);
739 spin_lock(lock);
740 }
741 }
742 }
743
744 spin_unlock(lock);
745 blk_finish_plug(&plug);
746 spin_lock(lock);
747
748 while (!list_empty(&tmp)) {
749 bh = BH_ENTRY(tmp.prev);
750 get_bh(bh);
751 mapping = bh->b_assoc_map;
752 __remove_assoc_queue(bh);
753
754
755 smp_mb();
756 if (buffer_dirty(bh)) {
757 list_add(&bh->b_assoc_buffers,
758 &mapping->private_list);
759 bh->b_assoc_map = mapping;
760 }
761 spin_unlock(lock);
762 wait_on_buffer(bh);
763 if (!buffer_uptodate(bh))
764 err = -EIO;
765 brelse(bh);
766 spin_lock(lock);
767 }
768
769 spin_unlock(lock);
770 err2 = osync_buffers_list(lock, list);
771 if (err)
772 return err;
773 else
774 return err2;
775}
776
777
778
779
780
781
782
783
784
785
786void invalidate_inode_buffers(struct inode *inode)
787{
788 if (inode_has_buffers(inode)) {
789 struct address_space *mapping = &inode->i_data;
790 struct list_head *list = &mapping->private_list;
791 struct address_space *buffer_mapping = mapping->private_data;
792
793 spin_lock(&buffer_mapping->private_lock);
794 while (!list_empty(list))
795 __remove_assoc_queue(BH_ENTRY(list->next));
796 spin_unlock(&buffer_mapping->private_lock);
797 }
798}
799EXPORT_SYMBOL(invalidate_inode_buffers);
800
801
802
803
804
805
806
807int remove_inode_buffers(struct inode *inode)
808{
809 int ret = 1;
810
811 if (inode_has_buffers(inode)) {
812 struct address_space *mapping = &inode->i_data;
813 struct list_head *list = &mapping->private_list;
814 struct address_space *buffer_mapping = mapping->private_data;
815
816 spin_lock(&buffer_mapping->private_lock);
817 while (!list_empty(list)) {
818 struct buffer_head *bh = BH_ENTRY(list->next);
819 if (buffer_dirty(bh)) {
820 ret = 0;
821 break;
822 }
823 __remove_assoc_queue(bh);
824 }
825 spin_unlock(&buffer_mapping->private_lock);
826 }
827 return ret;
828}
829
830
831
832
833
834
835
836
837
838
839struct buffer_head *alloc_page_buffers(struct page *page, unsigned long size,
840 bool retry)
841{
842 struct buffer_head *bh, *head;
843 gfp_t gfp = GFP_NOFS | __GFP_ACCOUNT;
844 long offset;
845 struct mem_cgroup *memcg;
846
847 if (retry)
848 gfp |= __GFP_NOFAIL;
849
850 memcg = get_mem_cgroup_from_page(page);
851 memalloc_use_memcg(memcg);
852
853 head = NULL;
854 offset = PAGE_SIZE;
855 while ((offset -= size) >= 0) {
856 bh = alloc_buffer_head(gfp);
857 if (!bh)
858 goto no_grow;
859
860 bh->b_this_page = head;
861 bh->b_blocknr = -1;
862 head = bh;
863
864 bh->b_size = size;
865
866
867 set_bh_page(bh, page, offset);
868 }
869out:
870 memalloc_unuse_memcg();
871 mem_cgroup_put(memcg);
872 return head;
873
874
875
876no_grow:
877 if (head) {
878 do {
879 bh = head;
880 head = head->b_this_page;
881 free_buffer_head(bh);
882 } while (head);
883 }
884
885 goto out;
886}
887EXPORT_SYMBOL_GPL(alloc_page_buffers);
888
889static inline void
890link_dev_buffers(struct page *page, struct buffer_head *head)
891{
892 struct buffer_head *bh, *tail;
893
894 bh = head;
895 do {
896 tail = bh;
897 bh = bh->b_this_page;
898 } while (bh);
899 tail->b_this_page = head;
900 attach_page_private(page, head);
901}
902
903static sector_t blkdev_max_block(struct block_device *bdev, unsigned int size)
904{
905 sector_t retval = ~((sector_t)0);
906 loff_t sz = i_size_read(bdev->bd_inode);
907
908 if (sz) {
909 unsigned int sizebits = blksize_bits(size);
910 retval = (sz >> sizebits);
911 }
912 return retval;
913}
914
915
916
917
918static sector_t
919init_page_buffers(struct page *page, struct block_device *bdev,
920 sector_t block, int size)
921{
922 struct buffer_head *head = page_buffers(page);
923 struct buffer_head *bh = head;
924 int uptodate = PageUptodate(page);
925 sector_t end_block = blkdev_max_block(I_BDEV(bdev->bd_inode), size);
926
927 do {
928 if (!buffer_mapped(bh)) {
929 bh->b_end_io = NULL;
930 bh->b_private = NULL;
931 bh->b_bdev = bdev;
932 bh->b_blocknr = block;
933 if (uptodate)
934 set_buffer_uptodate(bh);
935 if (block < end_block)
936 set_buffer_mapped(bh);
937 }
938 block++;
939 bh = bh->b_this_page;
940 } while (bh != head);
941
942
943
944
945 return end_block;
946}
947
948
949
950
951
952
953static int
954grow_dev_page(struct block_device *bdev, sector_t block,
955 pgoff_t index, int size, int sizebits, gfp_t gfp)
956{
957 struct inode *inode = bdev->bd_inode;
958 struct page *page;
959 struct buffer_head *bh;
960 sector_t end_block;
961 int ret = 0;
962 gfp_t gfp_mask;
963
964 gfp_mask = mapping_gfp_constraint(inode->i_mapping, ~__GFP_FS) | gfp;
965
966
967
968
969
970
971
972 gfp_mask |= __GFP_NOFAIL;
973
974 page = find_or_create_page(inode->i_mapping, index, gfp_mask);
975
976 BUG_ON(!PageLocked(page));
977
978 if (page_has_buffers(page)) {
979 bh = page_buffers(page);
980 if (bh->b_size == size) {
981 end_block = init_page_buffers(page, bdev,
982 (sector_t)index << sizebits,
983 size);
984 goto done;
985 }
986 if (!try_to_free_buffers(page))
987 goto failed;
988 }
989
990
991
992
993 bh = alloc_page_buffers(page, size, true);
994
995
996
997
998
999
1000 spin_lock(&inode->i_mapping->private_lock);
1001 link_dev_buffers(page, bh);
1002 end_block = init_page_buffers(page, bdev, (sector_t)index << sizebits,
1003 size);
1004 spin_unlock(&inode->i_mapping->private_lock);
1005done:
1006 ret = (block < end_block) ? 1 : -ENXIO;
1007failed:
1008 unlock_page(page);
1009 put_page(page);
1010 return ret;
1011}
1012
1013
1014
1015
1016
1017static int
1018grow_buffers(struct block_device *bdev, sector_t block, int size, gfp_t gfp)
1019{
1020 pgoff_t index;
1021 int sizebits;
1022
1023 sizebits = -1;
1024 do {
1025 sizebits++;
1026 } while ((size << sizebits) < PAGE_SIZE);
1027
1028 index = block >> sizebits;
1029
1030
1031
1032
1033
1034 if (unlikely(index != block >> sizebits)) {
1035 printk(KERN_ERR "%s: requested out-of-range block %llu for "
1036 "device %pg\n",
1037 __func__, (unsigned long long)block,
1038 bdev);
1039 return -EIO;
1040 }
1041
1042
1043 return grow_dev_page(bdev, block, index, size, sizebits, gfp);
1044}
1045
1046static struct buffer_head *
1047__getblk_slow(struct block_device *bdev, sector_t block,
1048 unsigned size, gfp_t gfp)
1049{
1050
1051 if (unlikely(size & (bdev_logical_block_size(bdev)-1) ||
1052 (size < 512 || size > PAGE_SIZE))) {
1053 printk(KERN_ERR "getblk(): invalid block size %d requested\n",
1054 size);
1055 printk(KERN_ERR "logical block size: %d\n",
1056 bdev_logical_block_size(bdev));
1057
1058 dump_stack();
1059 return NULL;
1060 }
1061
1062 for (;;) {
1063 struct buffer_head *bh;
1064 int ret;
1065
1066 bh = __find_get_block(bdev, block, size);
1067 if (bh)
1068 return bh;
1069
1070 ret = grow_buffers(bdev, block, size, gfp);
1071 if (ret < 0)
1072 return NULL;
1073 }
1074}
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111void mark_buffer_dirty(struct buffer_head *bh)
1112{
1113 WARN_ON_ONCE(!buffer_uptodate(bh));
1114
1115 trace_block_dirty_buffer(bh);
1116
1117
1118
1119
1120
1121
1122
1123 if (buffer_dirty(bh)) {
1124 smp_mb();
1125 if (buffer_dirty(bh))
1126 return;
1127 }
1128
1129 if (!test_set_buffer_dirty(bh)) {
1130 struct page *page = bh->b_page;
1131 struct address_space *mapping = NULL;
1132
1133 lock_page_memcg(page);
1134 if (!TestSetPageDirty(page)) {
1135 mapping = page_mapping(page);
1136 if (mapping)
1137 __set_page_dirty(page, mapping, 0);
1138 }
1139 unlock_page_memcg(page);
1140 if (mapping)
1141 __mark_inode_dirty(mapping->host, I_DIRTY_PAGES);
1142 }
1143}
1144EXPORT_SYMBOL(mark_buffer_dirty);
1145
1146void mark_buffer_write_io_error(struct buffer_head *bh)
1147{
1148 struct super_block *sb;
1149
1150 set_buffer_write_io_error(bh);
1151
1152 if (bh->b_page && bh->b_page->mapping)
1153 mapping_set_error(bh->b_page->mapping, -EIO);
1154 if (bh->b_assoc_map)
1155 mapping_set_error(bh->b_assoc_map, -EIO);
1156 rcu_read_lock();
1157 sb = READ_ONCE(bh->b_bdev->bd_super);
1158 if (sb)
1159 errseq_set(&sb->s_wb_err, -EIO);
1160 rcu_read_unlock();
1161}
1162EXPORT_SYMBOL(mark_buffer_write_io_error);
1163
1164
1165
1166
1167
1168
1169
1170
1171void __brelse(struct buffer_head * buf)
1172{
1173 if (atomic_read(&buf->b_count)) {
1174 put_bh(buf);
1175 return;
1176 }
1177 WARN(1, KERN_ERR "VFS: brelse: Trying to free free buffer\n");
1178}
1179EXPORT_SYMBOL(__brelse);
1180
1181
1182
1183
1184
1185void __bforget(struct buffer_head *bh)
1186{
1187 clear_buffer_dirty(bh);
1188 if (bh->b_assoc_map) {
1189 struct address_space *buffer_mapping = bh->b_page->mapping;
1190
1191 spin_lock(&buffer_mapping->private_lock);
1192 list_del_init(&bh->b_assoc_buffers);
1193 bh->b_assoc_map = NULL;
1194 spin_unlock(&buffer_mapping->private_lock);
1195 }
1196 __brelse(bh);
1197}
1198EXPORT_SYMBOL(__bforget);
1199
1200static struct buffer_head *__bread_slow(struct buffer_head *bh)
1201{
1202 lock_buffer(bh);
1203 if (buffer_uptodate(bh)) {
1204 unlock_buffer(bh);
1205 return bh;
1206 } else {
1207 get_bh(bh);
1208 bh->b_end_io = end_buffer_read_sync;
1209 submit_bh(REQ_OP_READ, 0, bh);
1210 wait_on_buffer(bh);
1211 if (buffer_uptodate(bh))
1212 return bh;
1213 }
1214 brelse(bh);
1215 return NULL;
1216}
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232#define BH_LRU_SIZE 16
1233
1234struct bh_lru {
1235 struct buffer_head *bhs[BH_LRU_SIZE];
1236};
1237
1238static DEFINE_PER_CPU(struct bh_lru, bh_lrus) = {{ NULL }};
1239
1240#ifdef CONFIG_SMP
1241#define bh_lru_lock() local_irq_disable()
1242#define bh_lru_unlock() local_irq_enable()
1243#else
1244#define bh_lru_lock() preempt_disable()
1245#define bh_lru_unlock() preempt_enable()
1246#endif
1247
1248static inline void check_irqs_on(void)
1249{
1250#ifdef irqs_disabled
1251 BUG_ON(irqs_disabled());
1252#endif
1253}
1254
1255
1256
1257
1258
1259
1260static void bh_lru_install(struct buffer_head *bh)
1261{
1262 struct buffer_head *evictee = bh;
1263 struct bh_lru *b;
1264 int i;
1265
1266 check_irqs_on();
1267 bh_lru_lock();
1268
1269 b = this_cpu_ptr(&bh_lrus);
1270 for (i = 0; i < BH_LRU_SIZE; i++) {
1271 swap(evictee, b->bhs[i]);
1272 if (evictee == bh) {
1273 bh_lru_unlock();
1274 return;
1275 }
1276 }
1277
1278 get_bh(bh);
1279 bh_lru_unlock();
1280 brelse(evictee);
1281}
1282
1283
1284
1285
1286static struct buffer_head *
1287lookup_bh_lru(struct block_device *bdev, sector_t block, unsigned size)
1288{
1289 struct buffer_head *ret = NULL;
1290 unsigned int i;
1291
1292 check_irqs_on();
1293 bh_lru_lock();
1294 for (i = 0; i < BH_LRU_SIZE; i++) {
1295 struct buffer_head *bh = __this_cpu_read(bh_lrus.bhs[i]);
1296
1297 if (bh && bh->b_blocknr == block && bh->b_bdev == bdev &&
1298 bh->b_size == size) {
1299 if (i) {
1300 while (i) {
1301 __this_cpu_write(bh_lrus.bhs[i],
1302 __this_cpu_read(bh_lrus.bhs[i - 1]));
1303 i--;
1304 }
1305 __this_cpu_write(bh_lrus.bhs[0], bh);
1306 }
1307 get_bh(bh);
1308 ret = bh;
1309 break;
1310 }
1311 }
1312 bh_lru_unlock();
1313 return ret;
1314}
1315
1316
1317
1318
1319
1320
1321struct buffer_head *
1322__find_get_block(struct block_device *bdev, sector_t block, unsigned size)
1323{
1324 struct buffer_head *bh = lookup_bh_lru(bdev, block, size);
1325
1326 if (bh == NULL) {
1327
1328 bh = __find_get_block_slow(bdev, block);
1329 if (bh)
1330 bh_lru_install(bh);
1331 } else
1332 touch_buffer(bh);
1333
1334 return bh;
1335}
1336EXPORT_SYMBOL(__find_get_block);
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346struct buffer_head *
1347__getblk_gfp(struct block_device *bdev, sector_t block,
1348 unsigned size, gfp_t gfp)
1349{
1350 struct buffer_head *bh = __find_get_block(bdev, block, size);
1351
1352 might_sleep();
1353 if (bh == NULL)
1354 bh = __getblk_slow(bdev, block, size, gfp);
1355 return bh;
1356}
1357EXPORT_SYMBOL(__getblk_gfp);
1358
1359
1360
1361
1362void __breadahead(struct block_device *bdev, sector_t block, unsigned size)
1363{
1364 struct buffer_head *bh = __getblk(bdev, block, size);
1365 if (likely(bh)) {
1366 ll_rw_block(REQ_OP_READ, REQ_RAHEAD, 1, &bh);
1367 brelse(bh);
1368 }
1369}
1370EXPORT_SYMBOL(__breadahead);
1371
1372void __breadahead_gfp(struct block_device *bdev, sector_t block, unsigned size,
1373 gfp_t gfp)
1374{
1375 struct buffer_head *bh = __getblk_gfp(bdev, block, size, gfp);
1376 if (likely(bh)) {
1377 ll_rw_block(REQ_OP_READ, REQ_RAHEAD, 1, &bh);
1378 brelse(bh);
1379 }
1380}
1381EXPORT_SYMBOL(__breadahead_gfp);
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395struct buffer_head *
1396__bread_gfp(struct block_device *bdev, sector_t block,
1397 unsigned size, gfp_t gfp)
1398{
1399 struct buffer_head *bh = __getblk_gfp(bdev, block, size, gfp);
1400
1401 if (likely(bh) && !buffer_uptodate(bh))
1402 bh = __bread_slow(bh);
1403 return bh;
1404}
1405EXPORT_SYMBOL(__bread_gfp);
1406
1407
1408
1409
1410
1411
1412static void invalidate_bh_lru(void *arg)
1413{
1414 struct bh_lru *b = &get_cpu_var(bh_lrus);
1415 int i;
1416
1417 for (i = 0; i < BH_LRU_SIZE; i++) {
1418 brelse(b->bhs[i]);
1419 b->bhs[i] = NULL;
1420 }
1421 put_cpu_var(bh_lrus);
1422}
1423
1424static bool has_bh_in_lru(int cpu, void *dummy)
1425{
1426 struct bh_lru *b = per_cpu_ptr(&bh_lrus, cpu);
1427 int i;
1428
1429 for (i = 0; i < BH_LRU_SIZE; i++) {
1430 if (b->bhs[i])
1431 return true;
1432 }
1433
1434 return false;
1435}
1436
1437void invalidate_bh_lrus(void)
1438{
1439 on_each_cpu_cond(has_bh_in_lru, invalidate_bh_lru, NULL, 1);
1440}
1441EXPORT_SYMBOL_GPL(invalidate_bh_lrus);
1442
1443void set_bh_page(struct buffer_head *bh,
1444 struct page *page, unsigned long offset)
1445{
1446 bh->b_page = page;
1447 BUG_ON(offset >= PAGE_SIZE);
1448 if (PageHighMem(page))
1449
1450
1451
1452 bh->b_data = (char *)(0 + offset);
1453 else
1454 bh->b_data = page_address(page) + offset;
1455}
1456EXPORT_SYMBOL(set_bh_page);
1457
1458
1459
1460
1461
1462
1463#define BUFFER_FLAGS_DISCARD \
1464 (1 << BH_Mapped | 1 << BH_New | 1 << BH_Req | \
1465 1 << BH_Delay | 1 << BH_Unwritten)
1466
1467static void discard_buffer(struct buffer_head * bh)
1468{
1469 unsigned long b_state, b_state_old;
1470
1471 lock_buffer(bh);
1472 clear_buffer_dirty(bh);
1473 bh->b_bdev = NULL;
1474 b_state = bh->b_state;
1475 for (;;) {
1476 b_state_old = cmpxchg(&bh->b_state, b_state,
1477 (b_state & ~BUFFER_FLAGS_DISCARD));
1478 if (b_state_old == b_state)
1479 break;
1480 b_state = b_state_old;
1481 }
1482 unlock_buffer(bh);
1483}
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501void block_invalidatepage(struct page *page, unsigned int offset,
1502 unsigned int length)
1503{
1504 struct buffer_head *head, *bh, *next;
1505 unsigned int curr_off = 0;
1506 unsigned int stop = length + offset;
1507
1508 BUG_ON(!PageLocked(page));
1509 if (!page_has_buffers(page))
1510 goto out;
1511
1512
1513
1514
1515 BUG_ON(stop > PAGE_SIZE || stop < length);
1516
1517 head = page_buffers(page);
1518 bh = head;
1519 do {
1520 unsigned int next_off = curr_off + bh->b_size;
1521 next = bh->b_this_page;
1522
1523
1524
1525
1526 if (next_off > stop)
1527 goto out;
1528
1529
1530
1531
1532 if (offset <= curr_off)
1533 discard_buffer(bh);
1534 curr_off = next_off;
1535 bh = next;
1536 } while (bh != head);
1537
1538
1539
1540
1541
1542
1543 if (length == PAGE_SIZE)
1544 try_to_release_page(page, 0);
1545out:
1546 return;
1547}
1548EXPORT_SYMBOL(block_invalidatepage);
1549
1550
1551
1552
1553
1554
1555
1556void create_empty_buffers(struct page *page,
1557 unsigned long blocksize, unsigned long b_state)
1558{
1559 struct buffer_head *bh, *head, *tail;
1560
1561 head = alloc_page_buffers(page, blocksize, true);
1562 bh = head;
1563 do {
1564 bh->b_state |= b_state;
1565 tail = bh;
1566 bh = bh->b_this_page;
1567 } while (bh);
1568 tail->b_this_page = head;
1569
1570 spin_lock(&page->mapping->private_lock);
1571 if (PageUptodate(page) || PageDirty(page)) {
1572 bh = head;
1573 do {
1574 if (PageDirty(page))
1575 set_buffer_dirty(bh);
1576 if (PageUptodate(page))
1577 set_buffer_uptodate(bh);
1578 bh = bh->b_this_page;
1579 } while (bh != head);
1580 }
1581 attach_page_private(page, head);
1582 spin_unlock(&page->mapping->private_lock);
1583}
1584EXPORT_SYMBOL(create_empty_buffers);
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606void clean_bdev_aliases(struct block_device *bdev, sector_t block, sector_t len)
1607{
1608 struct inode *bd_inode = bdev->bd_inode;
1609 struct address_space *bd_mapping = bd_inode->i_mapping;
1610 struct pagevec pvec;
1611 pgoff_t index = block >> (PAGE_SHIFT - bd_inode->i_blkbits);
1612 pgoff_t end;
1613 int i, count;
1614 struct buffer_head *bh;
1615 struct buffer_head *head;
1616
1617 end = (block + len - 1) >> (PAGE_SHIFT - bd_inode->i_blkbits);
1618 pagevec_init(&pvec);
1619 while (pagevec_lookup_range(&pvec, bd_mapping, &index, end)) {
1620 count = pagevec_count(&pvec);
1621 for (i = 0; i < count; i++) {
1622 struct page *page = pvec.pages[i];
1623
1624 if (!page_has_buffers(page))
1625 continue;
1626
1627
1628
1629
1630
1631 lock_page(page);
1632
1633 if (!page_has_buffers(page))
1634 goto unlock_page;
1635 head = page_buffers(page);
1636 bh = head;
1637 do {
1638 if (!buffer_mapped(bh) || (bh->b_blocknr < block))
1639 goto next;
1640 if (bh->b_blocknr >= block + len)
1641 break;
1642 clear_buffer_dirty(bh);
1643 wait_on_buffer(bh);
1644 clear_buffer_req(bh);
1645next:
1646 bh = bh->b_this_page;
1647 } while (bh != head);
1648unlock_page:
1649 unlock_page(page);
1650 }
1651 pagevec_release(&pvec);
1652 cond_resched();
1653
1654 if (index > end || !index)
1655 break;
1656 }
1657}
1658EXPORT_SYMBOL(clean_bdev_aliases);
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668static inline int block_size_bits(unsigned int blocksize)
1669{
1670 return ilog2(blocksize);
1671}
1672
1673static struct buffer_head *create_page_buffers(struct page *page, struct inode *inode, unsigned int b_state)
1674{
1675 BUG_ON(!PageLocked(page));
1676
1677 if (!page_has_buffers(page))
1678 create_empty_buffers(page, 1 << READ_ONCE(inode->i_blkbits),
1679 b_state);
1680 return page_buffers(page);
1681}
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712int __block_write_full_page(struct inode *inode, struct page *page,
1713 get_block_t *get_block, struct writeback_control *wbc,
1714 bh_end_io_t *handler)
1715{
1716 int err;
1717 sector_t block;
1718 sector_t last_block;
1719 struct buffer_head *bh, *head;
1720 unsigned int blocksize, bbits;
1721 int nr_underway = 0;
1722 int write_flags = wbc_to_write_flags(wbc);
1723
1724 head = create_page_buffers(page, inode,
1725 (1 << BH_Dirty)|(1 << BH_Uptodate));
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736
1737 bh = head;
1738 blocksize = bh->b_size;
1739 bbits = block_size_bits(blocksize);
1740
1741 block = (sector_t)page->index << (PAGE_SHIFT - bbits);
1742 last_block = (i_size_read(inode) - 1) >> bbits;
1743
1744
1745
1746
1747
1748 do {
1749 if (block > last_block) {
1750
1751
1752
1753
1754
1755
1756
1757
1758 clear_buffer_dirty(bh);
1759 set_buffer_uptodate(bh);
1760 } else if ((!buffer_mapped(bh) || buffer_delay(bh)) &&
1761 buffer_dirty(bh)) {
1762 WARN_ON(bh->b_size != blocksize);
1763 err = get_block(inode, block, bh, 1);
1764 if (err)
1765 goto recover;
1766 clear_buffer_delay(bh);
1767 if (buffer_new(bh)) {
1768
1769 clear_buffer_new(bh);
1770 clean_bdev_bh_alias(bh);
1771 }
1772 }
1773 bh = bh->b_this_page;
1774 block++;
1775 } while (bh != head);
1776
1777 do {
1778 if (!buffer_mapped(bh))
1779 continue;
1780
1781
1782
1783
1784
1785
1786
1787 if (wbc->sync_mode != WB_SYNC_NONE) {
1788 lock_buffer(bh);
1789 } else if (!trylock_buffer(bh)) {
1790 redirty_page_for_writepage(wbc, page);
1791 continue;
1792 }
1793 if (test_clear_buffer_dirty(bh)) {
1794 mark_buffer_async_write_endio(bh, handler);
1795 } else {
1796 unlock_buffer(bh);
1797 }
1798 } while ((bh = bh->b_this_page) != head);
1799
1800
1801
1802
1803
1804 BUG_ON(PageWriteback(page));
1805 set_page_writeback(page);
1806
1807 do {
1808 struct buffer_head *next = bh->b_this_page;
1809 if (buffer_async_write(bh)) {
1810 submit_bh_wbc(REQ_OP_WRITE, write_flags, bh,
1811 inode->i_write_hint, wbc);
1812 nr_underway++;
1813 }
1814 bh = next;
1815 } while (bh != head);
1816 unlock_page(page);
1817
1818 err = 0;
1819done:
1820 if (nr_underway == 0) {
1821
1822
1823
1824
1825
1826 end_page_writeback(page);
1827
1828
1829
1830
1831
1832 }
1833 return err;
1834
1835recover:
1836
1837
1838
1839
1840
1841
1842 bh = head;
1843
1844 do {
1845 if (buffer_mapped(bh) && buffer_dirty(bh) &&
1846 !buffer_delay(bh)) {
1847 lock_buffer(bh);
1848 mark_buffer_async_write_endio(bh, handler);
1849 } else {
1850
1851
1852
1853
1854 clear_buffer_dirty(bh);
1855 }
1856 } while ((bh = bh->b_this_page) != head);
1857 SetPageError(page);
1858 BUG_ON(PageWriteback(page));
1859 mapping_set_error(page->mapping, err);
1860 set_page_writeback(page);
1861 do {
1862 struct buffer_head *next = bh->b_this_page;
1863 if (buffer_async_write(bh)) {
1864 clear_buffer_dirty(bh);
1865 submit_bh_wbc(REQ_OP_WRITE, write_flags, bh,
1866 inode->i_write_hint, wbc);
1867 nr_underway++;
1868 }
1869 bh = next;
1870 } while (bh != head);
1871 unlock_page(page);
1872 goto done;
1873}
1874EXPORT_SYMBOL(__block_write_full_page);
1875
1876
1877
1878
1879
1880
1881void page_zero_new_buffers(struct page *page, unsigned from, unsigned to)
1882{
1883 unsigned int block_start, block_end;
1884 struct buffer_head *head, *bh;
1885
1886 BUG_ON(!PageLocked(page));
1887 if (!page_has_buffers(page))
1888 return;
1889
1890 bh = head = page_buffers(page);
1891 block_start = 0;
1892 do {
1893 block_end = block_start + bh->b_size;
1894
1895 if (buffer_new(bh)) {
1896 if (block_end > from && block_start < to) {
1897 if (!PageUptodate(page)) {
1898 unsigned start, size;
1899
1900 start = max(from, block_start);
1901 size = min(to, block_end) - start;
1902
1903 zero_user(page, start, size);
1904 set_buffer_uptodate(bh);
1905 }
1906
1907 clear_buffer_new(bh);
1908 mark_buffer_dirty(bh);
1909 }
1910 }
1911
1912 block_start = block_end;
1913 bh = bh->b_this_page;
1914 } while (bh != head);
1915}
1916EXPORT_SYMBOL(page_zero_new_buffers);
1917
1918static void
1919iomap_to_bh(struct inode *inode, sector_t block, struct buffer_head *bh,
1920 struct iomap *iomap)
1921{
1922 loff_t offset = block << inode->i_blkbits;
1923
1924 bh->b_bdev = iomap->bdev;
1925
1926
1927
1928
1929
1930
1931
1932 BUG_ON(offset >= iomap->offset + iomap->length);
1933
1934 switch (iomap->type) {
1935 case IOMAP_HOLE:
1936
1937
1938
1939
1940
1941 if (!buffer_uptodate(bh) ||
1942 (offset >= i_size_read(inode)))
1943 set_buffer_new(bh);
1944 break;
1945 case IOMAP_DELALLOC:
1946 if (!buffer_uptodate(bh) ||
1947 (offset >= i_size_read(inode)))
1948 set_buffer_new(bh);
1949 set_buffer_uptodate(bh);
1950 set_buffer_mapped(bh);
1951 set_buffer_delay(bh);
1952 break;
1953 case IOMAP_UNWRITTEN:
1954
1955
1956
1957
1958
1959 set_buffer_new(bh);
1960 set_buffer_unwritten(bh);
1961 fallthrough;
1962 case IOMAP_MAPPED:
1963 if ((iomap->flags & IOMAP_F_NEW) ||
1964 offset >= i_size_read(inode))
1965 set_buffer_new(bh);
1966 bh->b_blocknr = (iomap->addr + offset - iomap->offset) >>
1967 inode->i_blkbits;
1968 set_buffer_mapped(bh);
1969 break;
1970 }
1971}
1972
1973int __block_write_begin_int(struct page *page, loff_t pos, unsigned len,
1974 get_block_t *get_block, struct iomap *iomap)
1975{
1976 unsigned from = pos & (PAGE_SIZE - 1);
1977 unsigned to = from + len;
1978 struct inode *inode = page->mapping->host;
1979 unsigned block_start, block_end;
1980 sector_t block;
1981 int err = 0;
1982 unsigned blocksize, bbits;
1983 struct buffer_head *bh, *head, *wait[2], **wait_bh=wait;
1984
1985 BUG_ON(!PageLocked(page));
1986 BUG_ON(from > PAGE_SIZE);
1987 BUG_ON(to > PAGE_SIZE);
1988 BUG_ON(from > to);
1989
1990 head = create_page_buffers(page, inode, 0);
1991 blocksize = head->b_size;
1992 bbits = block_size_bits(blocksize);
1993
1994 block = (sector_t)page->index << (PAGE_SHIFT - bbits);
1995
1996 for(bh = head, block_start = 0; bh != head || !block_start;
1997 block++, block_start=block_end, bh = bh->b_this_page) {
1998 block_end = block_start + blocksize;
1999 if (block_end <= from || block_start >= to) {
2000 if (PageUptodate(page)) {
2001 if (!buffer_uptodate(bh))
2002 set_buffer_uptodate(bh);
2003 }
2004 continue;
2005 }
2006 if (buffer_new(bh))
2007 clear_buffer_new(bh);
2008 if (!buffer_mapped(bh)) {
2009 WARN_ON(bh->b_size != blocksize);
2010 if (get_block) {
2011 err = get_block(inode, block, bh, 1);
2012 if (err)
2013 break;
2014 } else {
2015 iomap_to_bh(inode, block, bh, iomap);
2016 }
2017
2018 if (buffer_new(bh)) {
2019 clean_bdev_bh_alias(bh);
2020 if (PageUptodate(page)) {
2021 clear_buffer_new(bh);
2022 set_buffer_uptodate(bh);
2023 mark_buffer_dirty(bh);
2024 continue;
2025 }
2026 if (block_end > to || block_start < from)
2027 zero_user_segments(page,
2028 to, block_end,
2029 block_start, from);
2030 continue;
2031 }
2032 }
2033 if (PageUptodate(page)) {
2034 if (!buffer_uptodate(bh))
2035 set_buffer_uptodate(bh);
2036 continue;
2037 }
2038 if (!buffer_uptodate(bh) && !buffer_delay(bh) &&
2039 !buffer_unwritten(bh) &&
2040 (block_start < from || block_end > to)) {
2041 ll_rw_block(REQ_OP_READ, 0, 1, &bh);
2042 *wait_bh++=bh;
2043 }
2044 }
2045
2046
2047
2048 while(wait_bh > wait) {
2049 wait_on_buffer(*--wait_bh);
2050 if (!buffer_uptodate(*wait_bh))
2051 err = -EIO;
2052 }
2053 if (unlikely(err))
2054 page_zero_new_buffers(page, from, to);
2055 return err;
2056}
2057
2058int __block_write_begin(struct page *page, loff_t pos, unsigned len,
2059 get_block_t *get_block)
2060{
2061 return __block_write_begin_int(page, pos, len, get_block, NULL);
2062}
2063EXPORT_SYMBOL(__block_write_begin);
2064
2065static int __block_commit_write(struct inode *inode, struct page *page,
2066 unsigned from, unsigned to)
2067{
2068 unsigned block_start, block_end;
2069 int partial = 0;
2070 unsigned blocksize;
2071 struct buffer_head *bh, *head;
2072
2073 bh = head = page_buffers(page);
2074 blocksize = bh->b_size;
2075
2076 block_start = 0;
2077 do {
2078 block_end = block_start + blocksize;
2079 if (block_end <= from || block_start >= to) {
2080 if (!buffer_uptodate(bh))
2081 partial = 1;
2082 } else {
2083 set_buffer_uptodate(bh);
2084 mark_buffer_dirty(bh);
2085 }
2086 clear_buffer_new(bh);
2087
2088 block_start = block_end;
2089 bh = bh->b_this_page;
2090 } while (bh != head);
2091
2092
2093
2094
2095
2096
2097
2098 if (!partial)
2099 SetPageUptodate(page);
2100 return 0;
2101}
2102
2103
2104
2105
2106
2107
2108
2109int block_write_begin(struct address_space *mapping, loff_t pos, unsigned len,
2110 unsigned flags, struct page **pagep, get_block_t *get_block)
2111{
2112 pgoff_t index = pos >> PAGE_SHIFT;
2113 struct page *page;
2114 int status;
2115
2116 page = grab_cache_page_write_begin(mapping, index, flags);
2117 if (!page)
2118 return -ENOMEM;
2119
2120 status = __block_write_begin(page, pos, len, get_block);
2121 if (unlikely(status)) {
2122 unlock_page(page);
2123 put_page(page);
2124 page = NULL;
2125 }
2126
2127 *pagep = page;
2128 return status;
2129}
2130EXPORT_SYMBOL(block_write_begin);
2131
2132int block_write_end(struct file *file, struct address_space *mapping,
2133 loff_t pos, unsigned len, unsigned copied,
2134 struct page *page, void *fsdata)
2135{
2136 struct inode *inode = mapping->host;
2137 unsigned start;
2138
2139 start = pos & (PAGE_SIZE - 1);
2140
2141 if (unlikely(copied < len)) {
2142
2143
2144
2145
2146
2147
2148
2149
2150
2151
2152
2153
2154 if (!PageUptodate(page))
2155 copied = 0;
2156
2157 page_zero_new_buffers(page, start+copied, start+len);
2158 }
2159 flush_dcache_page(page);
2160
2161
2162 __block_commit_write(inode, page, start, start+copied);
2163
2164 return copied;
2165}
2166EXPORT_SYMBOL(block_write_end);
2167
2168int generic_write_end(struct file *file, struct address_space *mapping,
2169 loff_t pos, unsigned len, unsigned copied,
2170 struct page *page, void *fsdata)
2171{
2172 struct inode *inode = mapping->host;
2173 loff_t old_size = inode->i_size;
2174 bool i_size_changed = false;
2175
2176 copied = block_write_end(file, mapping, pos, len, copied, page, fsdata);
2177
2178
2179
2180
2181
2182
2183
2184
2185 if (pos + copied > inode->i_size) {
2186 i_size_write(inode, pos + copied);
2187 i_size_changed = true;
2188 }
2189
2190 unlock_page(page);
2191 put_page(page);
2192
2193 if (old_size < pos)
2194 pagecache_isize_extended(inode, old_size, pos);
2195
2196
2197
2198
2199
2200
2201 if (i_size_changed)
2202 mark_inode_dirty(inode);
2203 return copied;
2204}
2205EXPORT_SYMBOL(generic_write_end);
2206
2207
2208
2209
2210
2211
2212
2213
2214int block_is_partially_uptodate(struct page *page, unsigned long from,
2215 unsigned long count)
2216{
2217 unsigned block_start, block_end, blocksize;
2218 unsigned to;
2219 struct buffer_head *bh, *head;
2220 int ret = 1;
2221
2222 if (!page_has_buffers(page))
2223 return 0;
2224
2225 head = page_buffers(page);
2226 blocksize = head->b_size;
2227 to = min_t(unsigned, PAGE_SIZE - from, count);
2228 to = from + to;
2229 if (from < blocksize && to > PAGE_SIZE - blocksize)
2230 return 0;
2231
2232 bh = head;
2233 block_start = 0;
2234 do {
2235 block_end = block_start + blocksize;
2236 if (block_end > from && block_start < to) {
2237 if (!buffer_uptodate(bh)) {
2238 ret = 0;
2239 break;
2240 }
2241 if (block_end >= to)
2242 break;
2243 }
2244 block_start = block_end;
2245 bh = bh->b_this_page;
2246 } while (bh != head);
2247
2248 return ret;
2249}
2250EXPORT_SYMBOL(block_is_partially_uptodate);
2251
2252
2253
2254
2255
2256
2257
2258
2259int block_read_full_page(struct page *page, get_block_t *get_block)
2260{
2261 struct inode *inode = page->mapping->host;
2262 sector_t iblock, lblock;
2263 struct buffer_head *bh, *head, *arr[MAX_BUF_PER_PAGE];
2264 unsigned int blocksize, bbits;
2265 int nr, i;
2266 int fully_mapped = 1;
2267
2268 head = create_page_buffers(page, inode, 0);
2269 blocksize = head->b_size;
2270 bbits = block_size_bits(blocksize);
2271
2272 iblock = (sector_t)page->index << (PAGE_SHIFT - bbits);
2273 lblock = (i_size_read(inode)+blocksize-1) >> bbits;
2274 bh = head;
2275 nr = 0;
2276 i = 0;
2277
2278 do {
2279 if (buffer_uptodate(bh))
2280 continue;
2281
2282 if (!buffer_mapped(bh)) {
2283 int err = 0;
2284
2285 fully_mapped = 0;
2286 if (iblock < lblock) {
2287 WARN_ON(bh->b_size != blocksize);
2288 err = get_block(inode, iblock, bh, 0);
2289 if (err)
2290 SetPageError(page);
2291 }
2292 if (!buffer_mapped(bh)) {
2293 zero_user(page, i * blocksize, blocksize);
2294 if (!err)
2295 set_buffer_uptodate(bh);
2296 continue;
2297 }
2298
2299
2300
2301
2302 if (buffer_uptodate(bh))
2303 continue;
2304 }
2305 arr[nr++] = bh;
2306 } while (i++, iblock++, (bh = bh->b_this_page) != head);
2307
2308 if (fully_mapped)
2309 SetPageMappedToDisk(page);
2310
2311 if (!nr) {
2312
2313
2314
2315
2316 if (!PageError(page))
2317 SetPageUptodate(page);
2318 unlock_page(page);
2319 return 0;
2320 }
2321
2322
2323 for (i = 0; i < nr; i++) {
2324 bh = arr[i];
2325 lock_buffer(bh);
2326 mark_buffer_async_read(bh);
2327 }
2328
2329
2330
2331
2332
2333
2334 for (i = 0; i < nr; i++) {
2335 bh = arr[i];
2336 if (buffer_uptodate(bh))
2337 end_buffer_async_read(bh, 1);
2338 else
2339 submit_bh(REQ_OP_READ, 0, bh);
2340 }
2341 return 0;
2342}
2343EXPORT_SYMBOL(block_read_full_page);
2344
2345
2346
2347
2348
2349int generic_cont_expand_simple(struct inode *inode, loff_t size)
2350{
2351 struct address_space *mapping = inode->i_mapping;
2352 struct page *page;
2353 void *fsdata;
2354 int err;
2355
2356 err = inode_newsize_ok(inode, size);
2357 if (err)
2358 goto out;
2359
2360 err = pagecache_write_begin(NULL, mapping, size, 0,
2361 AOP_FLAG_CONT_EXPAND, &page, &fsdata);
2362 if (err)
2363 goto out;
2364
2365 err = pagecache_write_end(NULL, mapping, size, 0, 0, page, fsdata);
2366 BUG_ON(err > 0);
2367
2368out:
2369 return err;
2370}
2371EXPORT_SYMBOL(generic_cont_expand_simple);
2372
2373static int cont_expand_zero(struct file *file, struct address_space *mapping,
2374 loff_t pos, loff_t *bytes)
2375{
2376 struct inode *inode = mapping->host;
2377 unsigned int blocksize = i_blocksize(inode);
2378 struct page *page;
2379 void *fsdata;
2380 pgoff_t index, curidx;
2381 loff_t curpos;
2382 unsigned zerofrom, offset, len;
2383 int err = 0;
2384
2385 index = pos >> PAGE_SHIFT;
2386 offset = pos & ~PAGE_MASK;
2387
2388 while (index > (curidx = (curpos = *bytes)>>PAGE_SHIFT)) {
2389 zerofrom = curpos & ~PAGE_MASK;
2390 if (zerofrom & (blocksize-1)) {
2391 *bytes |= (blocksize-1);
2392 (*bytes)++;
2393 }
2394 len = PAGE_SIZE - zerofrom;
2395
2396 err = pagecache_write_begin(file, mapping, curpos, len, 0,
2397 &page, &fsdata);
2398 if (err)
2399 goto out;
2400 zero_user(page, zerofrom, len);
2401 err = pagecache_write_end(file, mapping, curpos, len, len,
2402 page, fsdata);
2403 if (err < 0)
2404 goto out;
2405 BUG_ON(err != len);
2406 err = 0;
2407
2408 balance_dirty_pages_ratelimited(mapping);
2409
2410 if (fatal_signal_pending(current)) {
2411 err = -EINTR;
2412 goto out;
2413 }
2414 }
2415
2416
2417 if (index == curidx) {
2418 zerofrom = curpos & ~PAGE_MASK;
2419
2420 if (offset <= zerofrom) {
2421 goto out;
2422 }
2423 if (zerofrom & (blocksize-1)) {
2424 *bytes |= (blocksize-1);
2425 (*bytes)++;
2426 }
2427 len = offset - zerofrom;
2428
2429 err = pagecache_write_begin(file, mapping, curpos, len, 0,
2430 &page, &fsdata);
2431 if (err)
2432 goto out;
2433 zero_user(page, zerofrom, len);
2434 err = pagecache_write_end(file, mapping, curpos, len, len,
2435 page, fsdata);
2436 if (err < 0)
2437 goto out;
2438 BUG_ON(err != len);
2439 err = 0;
2440 }
2441out:
2442 return err;
2443}
2444
2445
2446
2447
2448
2449int cont_write_begin(struct file *file, struct address_space *mapping,
2450 loff_t pos, unsigned len, unsigned flags,
2451 struct page **pagep, void **fsdata,
2452 get_block_t *get_block, loff_t *bytes)
2453{
2454 struct inode *inode = mapping->host;
2455 unsigned int blocksize = i_blocksize(inode);
2456 unsigned int zerofrom;
2457 int err;
2458
2459 err = cont_expand_zero(file, mapping, pos, bytes);
2460 if (err)
2461 return err;
2462
2463 zerofrom = *bytes & ~PAGE_MASK;
2464 if (pos+len > *bytes && zerofrom & (blocksize-1)) {
2465 *bytes |= (blocksize-1);
2466 (*bytes)++;
2467 }
2468
2469 return block_write_begin(mapping, pos, len, flags, pagep, get_block);
2470}
2471EXPORT_SYMBOL(cont_write_begin);
2472
2473int block_commit_write(struct page *page, unsigned from, unsigned to)
2474{
2475 struct inode *inode = page->mapping->host;
2476 __block_commit_write(inode,page,from,to);
2477 return 0;
2478}
2479EXPORT_SYMBOL(block_commit_write);
2480
2481
2482
2483
2484
2485
2486
2487
2488
2489
2490
2491
2492
2493
2494
2495
2496
2497
2498
2499int block_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf,
2500 get_block_t get_block)
2501{
2502 struct page *page = vmf->page;
2503 struct inode *inode = file_inode(vma->vm_file);
2504 unsigned long end;
2505 loff_t size;
2506 int ret;
2507
2508 lock_page(page);
2509 size = i_size_read(inode);
2510 if ((page->mapping != inode->i_mapping) ||
2511 (page_offset(page) > size)) {
2512
2513 ret = -EFAULT;
2514 goto out_unlock;
2515 }
2516
2517
2518 if (((page->index + 1) << PAGE_SHIFT) > size)
2519 end = size & ~PAGE_MASK;
2520 else
2521 end = PAGE_SIZE;
2522
2523 ret = __block_write_begin(page, 0, end, get_block);
2524 if (!ret)
2525 ret = block_commit_write(page, 0, end);
2526
2527 if (unlikely(ret < 0))
2528 goto out_unlock;
2529 set_page_dirty(page);
2530 wait_for_stable_page(page);
2531 return 0;
2532out_unlock:
2533 unlock_page(page);
2534 return ret;
2535}
2536EXPORT_SYMBOL(block_page_mkwrite);
2537
2538
2539
2540
2541
2542
2543static void end_buffer_read_nobh(struct buffer_head *bh, int uptodate)
2544{
2545 __end_buffer_read_notouch(bh, uptodate);
2546}
2547
2548
2549
2550
2551
2552
2553static void attach_nobh_buffers(struct page *page, struct buffer_head *head)
2554{
2555 struct buffer_head *bh;
2556
2557 BUG_ON(!PageLocked(page));
2558
2559 spin_lock(&page->mapping->private_lock);
2560 bh = head;
2561 do {
2562 if (PageDirty(page))
2563 set_buffer_dirty(bh);
2564 if (!bh->b_this_page)
2565 bh->b_this_page = head;
2566 bh = bh->b_this_page;
2567 } while (bh != head);
2568 attach_page_private(page, head);
2569 spin_unlock(&page->mapping->private_lock);
2570}
2571
2572
2573
2574
2575
2576
2577int nobh_write_begin(struct address_space *mapping,
2578 loff_t pos, unsigned len, unsigned flags,
2579 struct page **pagep, void **fsdata,
2580 get_block_t *get_block)
2581{
2582 struct inode *inode = mapping->host;
2583 const unsigned blkbits = inode->i_blkbits;
2584 const unsigned blocksize = 1 << blkbits;
2585 struct buffer_head *head, *bh;
2586 struct page *page;
2587 pgoff_t index;
2588 unsigned from, to;
2589 unsigned block_in_page;
2590 unsigned block_start, block_end;
2591 sector_t block_in_file;
2592 int nr_reads = 0;
2593 int ret = 0;
2594 int is_mapped_to_disk = 1;
2595
2596 index = pos >> PAGE_SHIFT;
2597 from = pos & (PAGE_SIZE - 1);
2598 to = from + len;
2599
2600 page = grab_cache_page_write_begin(mapping, index, flags);
2601 if (!page)
2602 return -ENOMEM;
2603 *pagep = page;
2604 *fsdata = NULL;
2605
2606 if (page_has_buffers(page)) {
2607 ret = __block_write_begin(page, pos, len, get_block);
2608 if (unlikely(ret))
2609 goto out_release;
2610 return ret;
2611 }
2612
2613 if (PageMappedToDisk(page))
2614 return 0;
2615
2616
2617
2618
2619
2620
2621
2622
2623
2624
2625 head = alloc_page_buffers(page, blocksize, false);
2626 if (!head) {
2627 ret = -ENOMEM;
2628 goto out_release;
2629 }
2630
2631 block_in_file = (sector_t)page->index << (PAGE_SHIFT - blkbits);
2632
2633
2634
2635
2636
2637
2638 for (block_start = 0, block_in_page = 0, bh = head;
2639 block_start < PAGE_SIZE;
2640 block_in_page++, block_start += blocksize, bh = bh->b_this_page) {
2641 int create;
2642
2643 block_end = block_start + blocksize;
2644 bh->b_state = 0;
2645 create = 1;
2646 if (block_start >= to)
2647 create = 0;
2648 ret = get_block(inode, block_in_file + block_in_page,
2649 bh, create);
2650 if (ret)
2651 goto failed;
2652 if (!buffer_mapped(bh))
2653 is_mapped_to_disk = 0;
2654 if (buffer_new(bh))
2655 clean_bdev_bh_alias(bh);
2656 if (PageUptodate(page)) {
2657 set_buffer_uptodate(bh);
2658 continue;
2659 }
2660 if (buffer_new(bh) || !buffer_mapped(bh)) {
2661 zero_user_segments(page, block_start, from,
2662 to, block_end);
2663 continue;
2664 }
2665 if (buffer_uptodate(bh))
2666 continue;
2667 if (block_start < from || block_end > to) {
2668 lock_buffer(bh);
2669 bh->b_end_io = end_buffer_read_nobh;
2670 submit_bh(REQ_OP_READ, 0, bh);
2671 nr_reads++;
2672 }
2673 }
2674
2675 if (nr_reads) {
2676
2677
2678
2679
2680
2681 for (bh = head; bh; bh = bh->b_this_page) {
2682 wait_on_buffer(bh);
2683 if (!buffer_uptodate(bh))
2684 ret = -EIO;
2685 }
2686 if (ret)
2687 goto failed;
2688 }
2689
2690 if (is_mapped_to_disk)
2691 SetPageMappedToDisk(page);
2692
2693 *fsdata = head;
2694
2695 return 0;
2696
2697failed:
2698 BUG_ON(!ret);
2699
2700
2701
2702
2703
2704
2705
2706 attach_nobh_buffers(page, head);
2707 page_zero_new_buffers(page, from, to);
2708
2709out_release:
2710 unlock_page(page);
2711 put_page(page);
2712 *pagep = NULL;
2713
2714 return ret;
2715}
2716EXPORT_SYMBOL(nobh_write_begin);
2717
2718int nobh_write_end(struct file *file, struct address_space *mapping,
2719 loff_t pos, unsigned len, unsigned copied,
2720 struct page *page, void *fsdata)
2721{
2722 struct inode *inode = page->mapping->host;
2723 struct buffer_head *head = fsdata;
2724 struct buffer_head *bh;
2725 BUG_ON(fsdata != NULL && page_has_buffers(page));
2726
2727 if (unlikely(copied < len) && head)
2728 attach_nobh_buffers(page, head);
2729 if (page_has_buffers(page))
2730 return generic_write_end(file, mapping, pos, len,
2731 copied, page, fsdata);
2732
2733 SetPageUptodate(page);
2734 set_page_dirty(page);
2735 if (pos+copied > inode->i_size) {
2736 i_size_write(inode, pos+copied);
2737 mark_inode_dirty(inode);
2738 }
2739
2740 unlock_page(page);
2741 put_page(page);
2742
2743 while (head) {
2744 bh = head;
2745 head = head->b_this_page;
2746 free_buffer_head(bh);
2747 }
2748
2749 return copied;
2750}
2751EXPORT_SYMBOL(nobh_write_end);
2752
2753
2754
2755
2756
2757
2758int nobh_writepage(struct page *page, get_block_t *get_block,
2759 struct writeback_control *wbc)
2760{
2761 struct inode * const inode = page->mapping->host;
2762 loff_t i_size = i_size_read(inode);
2763 const pgoff_t end_index = i_size >> PAGE_SHIFT;
2764 unsigned offset;
2765 int ret;
2766
2767
2768 if (page->index < end_index)
2769 goto out;
2770
2771
2772 offset = i_size & (PAGE_SIZE-1);
2773 if (page->index >= end_index+1 || !offset) {
2774
2775
2776
2777
2778
2779#if 0
2780
2781 if (page->mapping->a_ops->invalidatepage)
2782 page->mapping->a_ops->invalidatepage(page, offset);
2783#endif
2784 unlock_page(page);
2785 return 0;
2786 }
2787
2788
2789
2790
2791
2792
2793
2794
2795 zero_user_segment(page, offset, PAGE_SIZE);
2796out:
2797 ret = mpage_writepage(page, get_block, wbc);
2798 if (ret == -EAGAIN)
2799 ret = __block_write_full_page(inode, page, get_block, wbc,
2800 end_buffer_async_write);
2801 return ret;
2802}
2803EXPORT_SYMBOL(nobh_writepage);
2804
2805int nobh_truncate_page(struct address_space *mapping,
2806 loff_t from, get_block_t *get_block)
2807{
2808 pgoff_t index = from >> PAGE_SHIFT;
2809 unsigned offset = from & (PAGE_SIZE-1);
2810 unsigned blocksize;
2811 sector_t iblock;
2812 unsigned length, pos;
2813 struct inode *inode = mapping->host;
2814 struct page *page;
2815 struct buffer_head map_bh;
2816 int err;
2817
2818 blocksize = i_blocksize(inode);
2819 length = offset & (blocksize - 1);
2820
2821
2822 if (!length)
2823 return 0;
2824
2825 length = blocksize - length;
2826 iblock = (sector_t)index << (PAGE_SHIFT - inode->i_blkbits);
2827
2828 page = grab_cache_page(mapping, index);
2829 err = -ENOMEM;
2830 if (!page)
2831 goto out;
2832
2833 if (page_has_buffers(page)) {
2834has_buffers:
2835 unlock_page(page);
2836 put_page(page);
2837 return block_truncate_page(mapping, from, get_block);
2838 }
2839
2840
2841 pos = blocksize;
2842 while (offset >= pos) {
2843 iblock++;
2844 pos += blocksize;
2845 }
2846
2847 map_bh.b_size = blocksize;
2848 map_bh.b_state = 0;
2849 err = get_block(inode, iblock, &map_bh, 0);
2850 if (err)
2851 goto unlock;
2852
2853 if (!buffer_mapped(&map_bh))
2854 goto unlock;
2855
2856
2857 if (!PageUptodate(page)) {
2858 err = mapping->a_ops->readpage(NULL, page);
2859 if (err) {
2860 put_page(page);
2861 goto out;
2862 }
2863 lock_page(page);
2864 if (!PageUptodate(page)) {
2865 err = -EIO;
2866 goto unlock;
2867 }
2868 if (page_has_buffers(page))
2869 goto has_buffers;
2870 }
2871 zero_user(page, offset, length);
2872 set_page_dirty(page);
2873 err = 0;
2874
2875unlock:
2876 unlock_page(page);
2877 put_page(page);
2878out:
2879 return err;
2880}
2881EXPORT_SYMBOL(nobh_truncate_page);
2882
2883int block_truncate_page(struct address_space *mapping,
2884 loff_t from, get_block_t *get_block)
2885{
2886 pgoff_t index = from >> PAGE_SHIFT;
2887 unsigned offset = from & (PAGE_SIZE-1);
2888 unsigned blocksize;
2889 sector_t iblock;
2890 unsigned length, pos;
2891 struct inode *inode = mapping->host;
2892 struct page *page;
2893 struct buffer_head *bh;
2894 int err;
2895
2896 blocksize = i_blocksize(inode);
2897 length = offset & (blocksize - 1);
2898
2899
2900 if (!length)
2901 return 0;
2902
2903 length = blocksize - length;
2904 iblock = (sector_t)index << (PAGE_SHIFT - inode->i_blkbits);
2905
2906 page = grab_cache_page(mapping, index);
2907 err = -ENOMEM;
2908 if (!page)
2909 goto out;
2910
2911 if (!page_has_buffers(page))
2912 create_empty_buffers(page, blocksize, 0);
2913
2914
2915 bh = page_buffers(page);
2916 pos = blocksize;
2917 while (offset >= pos) {
2918 bh = bh->b_this_page;
2919 iblock++;
2920 pos += blocksize;
2921 }
2922
2923 err = 0;
2924 if (!buffer_mapped(bh)) {
2925 WARN_ON(bh->b_size != blocksize);
2926 err = get_block(inode, iblock, bh, 0);
2927 if (err)
2928 goto unlock;
2929
2930 if (!buffer_mapped(bh))
2931 goto unlock;
2932 }
2933
2934
2935 if (PageUptodate(page))
2936 set_buffer_uptodate(bh);
2937
2938 if (!buffer_uptodate(bh) && !buffer_delay(bh) && !buffer_unwritten(bh)) {
2939 err = -EIO;
2940 ll_rw_block(REQ_OP_READ, 0, 1, &bh);
2941 wait_on_buffer(bh);
2942
2943 if (!buffer_uptodate(bh))
2944 goto unlock;
2945 }
2946
2947 zero_user(page, offset, length);
2948 mark_buffer_dirty(bh);
2949 err = 0;
2950
2951unlock:
2952 unlock_page(page);
2953 put_page(page);
2954out:
2955 return err;
2956}
2957EXPORT_SYMBOL(block_truncate_page);
2958
2959
2960
2961
2962int block_write_full_page(struct page *page, get_block_t *get_block,
2963 struct writeback_control *wbc)
2964{
2965 struct inode * const inode = page->mapping->host;
2966 loff_t i_size = i_size_read(inode);
2967 const pgoff_t end_index = i_size >> PAGE_SHIFT;
2968 unsigned offset;
2969
2970
2971 if (page->index < end_index)
2972 return __block_write_full_page(inode, page, get_block, wbc,
2973 end_buffer_async_write);
2974
2975
2976 offset = i_size & (PAGE_SIZE-1);
2977 if (page->index >= end_index+1 || !offset) {
2978
2979
2980
2981
2982
2983 do_invalidatepage(page, 0, PAGE_SIZE);
2984 unlock_page(page);
2985 return 0;
2986 }
2987
2988
2989
2990
2991
2992
2993
2994
2995 zero_user_segment(page, offset, PAGE_SIZE);
2996 return __block_write_full_page(inode, page, get_block, wbc,
2997 end_buffer_async_write);
2998}
2999EXPORT_SYMBOL(block_write_full_page);
3000
3001sector_t generic_block_bmap(struct address_space *mapping, sector_t block,
3002 get_block_t *get_block)
3003{
3004 struct inode *inode = mapping->host;
3005 struct buffer_head tmp = {
3006 .b_size = i_blocksize(inode),
3007 };
3008
3009 get_block(inode, block, &tmp, 0);
3010 return tmp.b_blocknr;
3011}
3012EXPORT_SYMBOL(generic_block_bmap);
3013
3014static void end_bio_bh_io_sync(struct bio *bio)
3015{
3016 struct buffer_head *bh = bio->bi_private;
3017
3018 if (unlikely(bio_flagged(bio, BIO_QUIET)))
3019 set_bit(BH_Quiet, &bh->b_state);
3020
3021 bh->b_end_io(bh, !bio->bi_status);
3022 bio_put(bio);
3023}
3024
3025static int submit_bh_wbc(int op, int op_flags, struct buffer_head *bh,
3026 enum rw_hint write_hint, struct writeback_control *wbc)
3027{
3028 struct bio *bio;
3029
3030 BUG_ON(!buffer_locked(bh));
3031 BUG_ON(!buffer_mapped(bh));
3032 BUG_ON(!bh->b_end_io);
3033 BUG_ON(buffer_delay(bh));
3034 BUG_ON(buffer_unwritten(bh));
3035
3036
3037
3038
3039 if (test_set_buffer_req(bh) && (op == REQ_OP_WRITE))
3040 clear_buffer_write_io_error(bh);
3041
3042 bio = bio_alloc(GFP_NOIO, 1);
3043
3044 fscrypt_set_bio_crypt_ctx_bh(bio, bh, GFP_NOIO);
3045
3046 bio->bi_iter.bi_sector = bh->b_blocknr * (bh->b_size >> 9);
3047 bio_set_dev(bio, bh->b_bdev);
3048 bio->bi_write_hint = write_hint;
3049
3050 bio_add_page(bio, bh->b_page, bh->b_size, bh_offset(bh));
3051 BUG_ON(bio->bi_iter.bi_size != bh->b_size);
3052
3053 bio->bi_end_io = end_bio_bh_io_sync;
3054 bio->bi_private = bh;
3055
3056 if (buffer_meta(bh))
3057 op_flags |= REQ_META;
3058 if (buffer_prio(bh))
3059 op_flags |= REQ_PRIO;
3060 bio_set_op_attrs(bio, op, op_flags);
3061
3062
3063 guard_bio_eod(bio);
3064
3065 if (wbc) {
3066 wbc_init_bio(wbc, bio);
3067 wbc_account_cgroup_owner(wbc, bh->b_page, bh->b_size);
3068 }
3069
3070 submit_bio(bio);
3071 return 0;
3072}
3073
3074int submit_bh(int op, int op_flags, struct buffer_head *bh)
3075{
3076 return submit_bh_wbc(op, op_flags, bh, 0, NULL);
3077}
3078EXPORT_SYMBOL(submit_bh);
3079
3080
3081
3082
3083
3084
3085
3086
3087
3088
3089
3090
3091
3092
3093
3094
3095
3096
3097
3098
3099
3100
3101
3102
3103
3104
3105
3106void ll_rw_block(int op, int op_flags, int nr, struct buffer_head *bhs[])
3107{
3108 int i;
3109
3110 for (i = 0; i < nr; i++) {
3111 struct buffer_head *bh = bhs[i];
3112
3113 if (!trylock_buffer(bh))
3114 continue;
3115 if (op == WRITE) {
3116 if (test_clear_buffer_dirty(bh)) {
3117 bh->b_end_io = end_buffer_write_sync;
3118 get_bh(bh);
3119 submit_bh(op, op_flags, bh);
3120 continue;
3121 }
3122 } else {
3123 if (!buffer_uptodate(bh)) {
3124 bh->b_end_io = end_buffer_read_sync;
3125 get_bh(bh);
3126 submit_bh(op, op_flags, bh);
3127 continue;
3128 }
3129 }
3130 unlock_buffer(bh);
3131 }
3132}
3133EXPORT_SYMBOL(ll_rw_block);
3134
3135void write_dirty_buffer(struct buffer_head *bh, int op_flags)
3136{
3137 lock_buffer(bh);
3138 if (!test_clear_buffer_dirty(bh)) {
3139 unlock_buffer(bh);
3140 return;
3141 }
3142 bh->b_end_io = end_buffer_write_sync;
3143 get_bh(bh);
3144 submit_bh(REQ_OP_WRITE, op_flags, bh);
3145}
3146EXPORT_SYMBOL(write_dirty_buffer);
3147
3148
3149
3150
3151
3152
3153int __sync_dirty_buffer(struct buffer_head *bh, int op_flags)
3154{
3155 int ret = 0;
3156
3157 WARN_ON(atomic_read(&bh->b_count) < 1);
3158 lock_buffer(bh);
3159 if (test_clear_buffer_dirty(bh)) {
3160
3161
3162
3163
3164 if (!buffer_mapped(bh)) {
3165 unlock_buffer(bh);
3166 return -EIO;
3167 }
3168
3169 get_bh(bh);
3170 bh->b_end_io = end_buffer_write_sync;
3171 ret = submit_bh(REQ_OP_WRITE, op_flags, bh);
3172 wait_on_buffer(bh);
3173 if (!ret && !buffer_uptodate(bh))
3174 ret = -EIO;
3175 } else {
3176 unlock_buffer(bh);
3177 }
3178 return ret;
3179}
3180EXPORT_SYMBOL(__sync_dirty_buffer);
3181
3182int sync_dirty_buffer(struct buffer_head *bh)
3183{
3184 return __sync_dirty_buffer(bh, REQ_SYNC);
3185}
3186EXPORT_SYMBOL(sync_dirty_buffer);
3187
3188
3189
3190
3191
3192
3193
3194
3195
3196
3197
3198
3199
3200
3201
3202
3203
3204
3205
3206
3207
3208static inline int buffer_busy(struct buffer_head *bh)
3209{
3210 return atomic_read(&bh->b_count) |
3211 (bh->b_state & ((1 << BH_Dirty) | (1 << BH_Lock)));
3212}
3213
3214static int
3215drop_buffers(struct page *page, struct buffer_head **buffers_to_free)
3216{
3217 struct buffer_head *head = page_buffers(page);
3218 struct buffer_head *bh;
3219
3220 bh = head;
3221 do {
3222 if (buffer_busy(bh))
3223 goto failed;
3224 bh = bh->b_this_page;
3225 } while (bh != head);
3226
3227 do {
3228 struct buffer_head *next = bh->b_this_page;
3229
3230 if (bh->b_assoc_map)
3231 __remove_assoc_queue(bh);
3232 bh = next;
3233 } while (bh != head);
3234 *buffers_to_free = head;
3235 detach_page_private(page);
3236 return 1;
3237failed:
3238 return 0;
3239}
3240
3241int try_to_free_buffers(struct page *page)
3242{
3243 struct address_space * const mapping = page->mapping;
3244 struct buffer_head *buffers_to_free = NULL;
3245 int ret = 0;
3246
3247 BUG_ON(!PageLocked(page));
3248 if (PageWriteback(page))
3249 return 0;
3250
3251 if (mapping == NULL) {
3252 ret = drop_buffers(page, &buffers_to_free);
3253 goto out;
3254 }
3255
3256 spin_lock(&mapping->private_lock);
3257 ret = drop_buffers(page, &buffers_to_free);
3258
3259
3260
3261
3262
3263
3264
3265
3266
3267
3268
3269
3270
3271
3272
3273 if (ret)
3274 cancel_dirty_page(page);
3275 spin_unlock(&mapping->private_lock);
3276out:
3277 if (buffers_to_free) {
3278 struct buffer_head *bh = buffers_to_free;
3279
3280 do {
3281 struct buffer_head *next = bh->b_this_page;
3282 free_buffer_head(bh);
3283 bh = next;
3284 } while (bh != buffers_to_free);
3285 }
3286 return ret;
3287}
3288EXPORT_SYMBOL(try_to_free_buffers);
3289
3290
3291
3292
3293
3294
3295
3296
3297SYSCALL_DEFINE2(bdflush, int, func, long, data)
3298{
3299 static int msg_count;
3300
3301 if (!capable(CAP_SYS_ADMIN))
3302 return -EPERM;
3303
3304 if (msg_count < 5) {
3305 msg_count++;
3306 printk(KERN_INFO
3307 "warning: process `%s' used the obsolete bdflush"
3308 " system call\n", current->comm);
3309 printk(KERN_INFO "Fix your initscripts?\n");
3310 }
3311
3312 if (func == 1)
3313 do_exit(0);
3314 return 0;
3315}
3316
3317
3318
3319
3320static struct kmem_cache *bh_cachep __read_mostly;
3321
3322
3323
3324
3325
3326static unsigned long max_buffer_heads;
3327
3328int buffer_heads_over_limit;
3329
3330struct bh_accounting {
3331 int nr;
3332 int ratelimit;
3333};
3334
3335static DEFINE_PER_CPU(struct bh_accounting, bh_accounting) = {0, 0};
3336
3337static void recalc_bh_state(void)
3338{
3339 int i;
3340 int tot = 0;
3341
3342 if (__this_cpu_inc_return(bh_accounting.ratelimit) - 1 < 4096)
3343 return;
3344 __this_cpu_write(bh_accounting.ratelimit, 0);
3345 for_each_online_cpu(i)
3346 tot += per_cpu(bh_accounting, i).nr;
3347 buffer_heads_over_limit = (tot > max_buffer_heads);
3348}
3349
3350struct buffer_head *alloc_buffer_head(gfp_t gfp_flags)
3351{
3352 struct buffer_head *ret = kmem_cache_zalloc(bh_cachep, gfp_flags);
3353 if (ret) {
3354 INIT_LIST_HEAD(&ret->b_assoc_buffers);
3355 spin_lock_init(&ret->b_uptodate_lock);
3356 preempt_disable();
3357 __this_cpu_inc(bh_accounting.nr);
3358 recalc_bh_state();
3359 preempt_enable();
3360 }
3361 return ret;
3362}
3363EXPORT_SYMBOL(alloc_buffer_head);
3364
3365void free_buffer_head(struct buffer_head *bh)
3366{
3367 BUG_ON(!list_empty(&bh->b_assoc_buffers));
3368 kmem_cache_free(bh_cachep, bh);
3369 preempt_disable();
3370 __this_cpu_dec(bh_accounting.nr);
3371 recalc_bh_state();
3372 preempt_enable();
3373}
3374EXPORT_SYMBOL(free_buffer_head);
3375
3376static int buffer_exit_cpu_dead(unsigned int cpu)
3377{
3378 int i;
3379 struct bh_lru *b = &per_cpu(bh_lrus, cpu);
3380
3381 for (i = 0; i < BH_LRU_SIZE; i++) {
3382 brelse(b->bhs[i]);
3383 b->bhs[i] = NULL;
3384 }
3385 this_cpu_add(bh_accounting.nr, per_cpu(bh_accounting, cpu).nr);
3386 per_cpu(bh_accounting, cpu).nr = 0;
3387 return 0;
3388}
3389
3390
3391
3392
3393
3394
3395
3396
3397int bh_uptodate_or_lock(struct buffer_head *bh)
3398{
3399 if (!buffer_uptodate(bh)) {
3400 lock_buffer(bh);
3401 if (!buffer_uptodate(bh))
3402 return 0;
3403 unlock_buffer(bh);
3404 }
3405 return 1;
3406}
3407EXPORT_SYMBOL(bh_uptodate_or_lock);
3408
3409
3410
3411
3412
3413
3414
3415int bh_submit_read(struct buffer_head *bh)
3416{
3417 BUG_ON(!buffer_locked(bh));
3418
3419 if (buffer_uptodate(bh)) {
3420 unlock_buffer(bh);
3421 return 0;
3422 }
3423
3424 get_bh(bh);
3425 bh->b_end_io = end_buffer_read_sync;
3426 submit_bh(REQ_OP_READ, 0, bh);
3427 wait_on_buffer(bh);
3428 if (buffer_uptodate(bh))
3429 return 0;
3430 return -EIO;
3431}
3432EXPORT_SYMBOL(bh_submit_read);
3433
3434void __init buffer_init(void)
3435{
3436 unsigned long nrpages;
3437 int ret;
3438
3439 bh_cachep = kmem_cache_create("buffer_head",
3440 sizeof(struct buffer_head), 0,
3441 (SLAB_RECLAIM_ACCOUNT|SLAB_PANIC|
3442 SLAB_MEM_SPREAD),
3443 NULL);
3444
3445
3446
3447
3448 nrpages = (nr_free_buffer_pages() * 10) / 100;
3449 max_buffer_heads = nrpages * (PAGE_SIZE / sizeof(struct buffer_head));
3450 ret = cpuhp_setup_state_nocalls(CPUHP_FS_BUFF_DEAD, "fs/buffer:dead",
3451 NULL, buffer_exit_cpu_dead);
3452 WARN_ON(ret < 0);
3453}
3454