1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21#include <linux/kernel.h>
22#include <linux/syscalls.h>
23#include <linux/fs.h>
24#include <linux/iomap.h>
25#include <linux/mm.h>
26#include <linux/percpu.h>
27#include <linux/slab.h>
28#include <linux/capability.h>
29#include <linux/blkdev.h>
30#include <linux/file.h>
31#include <linux/quotaops.h>
32#include <linux/highmem.h>
33#include <linux/export.h>
34#include <linux/backing-dev.h>
35#include <linux/writeback.h>
36#include <linux/hash.h>
37#include <linux/suspend.h>
38#include <linux/buffer_head.h>
39#include <linux/task_io_accounting_ops.h>
40#include <linux/bio.h>
41#include <linux/notifier.h>
42#include <linux/cpu.h>
43#include <linux/bitops.h>
44#include <linux/mpage.h>
45#include <linux/bit_spinlock.h>
46#include <trace/events/block.h>
47
48static int fsync_buffers_list(spinlock_t *lock, struct list_head *list);
49static int submit_bh_wbc(int op, int op_flags, struct buffer_head *bh,
50 unsigned long bio_flags,
51 struct writeback_control *wbc);
52
53#define BH_ENTRY(list) list_entry((list), struct buffer_head, b_assoc_buffers)
54
55void init_buffer(struct buffer_head *bh, bh_end_io_t *handler, void *private)
56{
57 bh->b_end_io = handler;
58 bh->b_private = private;
59}
60EXPORT_SYMBOL(init_buffer);
61
62inline void touch_buffer(struct buffer_head *bh)
63{
64 trace_block_touch_buffer(bh);
65 mark_page_accessed(bh->b_page);
66}
67EXPORT_SYMBOL(touch_buffer);
68
69void __lock_buffer(struct buffer_head *bh)
70{
71 wait_on_bit_lock_io(&bh->b_state, BH_Lock, TASK_UNINTERRUPTIBLE);
72}
73EXPORT_SYMBOL(__lock_buffer);
74
75void unlock_buffer(struct buffer_head *bh)
76{
77 clear_bit_unlock(BH_Lock, &bh->b_state);
78 smp_mb__after_atomic();
79 wake_up_bit(&bh->b_state, BH_Lock);
80}
81EXPORT_SYMBOL(unlock_buffer);
82
83
84
85
86
87
88void buffer_check_dirty_writeback(struct page *page,
89 bool *dirty, bool *writeback)
90{
91 struct buffer_head *head, *bh;
92 *dirty = false;
93 *writeback = false;
94
95 BUG_ON(!PageLocked(page));
96
97 if (!page_has_buffers(page))
98 return;
99
100 if (PageWriteback(page))
101 *writeback = true;
102
103 head = page_buffers(page);
104 bh = head;
105 do {
106 if (buffer_locked(bh))
107 *writeback = true;
108
109 if (buffer_dirty(bh))
110 *dirty = true;
111
112 bh = bh->b_this_page;
113 } while (bh != head);
114}
115EXPORT_SYMBOL(buffer_check_dirty_writeback);
116
117
118
119
120
121
122void __wait_on_buffer(struct buffer_head * bh)
123{
124 wait_on_bit_io(&bh->b_state, BH_Lock, TASK_UNINTERRUPTIBLE);
125}
126EXPORT_SYMBOL(__wait_on_buffer);
127
128static void
129__clear_page_buffers(struct page *page)
130{
131 ClearPagePrivate(page);
132 set_page_private(page, 0);
133 put_page(page);
134}
135
136static void buffer_io_error(struct buffer_head *bh, char *msg)
137{
138 if (!test_bit(BH_Quiet, &bh->b_state))
139 printk_ratelimited(KERN_ERR
140 "Buffer I/O error on dev %pg, logical block %llu%s\n",
141 bh->b_bdev, (unsigned long long)bh->b_blocknr, msg);
142}
143
144
145
146
147
148
149
150
151
152static void __end_buffer_read_notouch(struct buffer_head *bh, int uptodate)
153{
154 if (uptodate) {
155 set_buffer_uptodate(bh);
156 } else {
157
158 clear_buffer_uptodate(bh);
159 }
160 unlock_buffer(bh);
161}
162
163
164
165
166
167void end_buffer_read_sync(struct buffer_head *bh, int uptodate)
168{
169 __end_buffer_read_notouch(bh, uptodate);
170 put_bh(bh);
171}
172EXPORT_SYMBOL(end_buffer_read_sync);
173
174void end_buffer_write_sync(struct buffer_head *bh, int uptodate)
175{
176 if (uptodate) {
177 set_buffer_uptodate(bh);
178 } else {
179 buffer_io_error(bh, ", lost sync page write");
180 set_buffer_write_io_error(bh);
181 clear_buffer_uptodate(bh);
182 }
183 unlock_buffer(bh);
184 put_bh(bh);
185}
186EXPORT_SYMBOL(end_buffer_write_sync);
187
188
189
190
191
192
193
194
195
196
197
198
199static struct buffer_head *
200__find_get_block_slow(struct block_device *bdev, sector_t block)
201{
202 struct inode *bd_inode = bdev->bd_inode;
203 struct address_space *bd_mapping = bd_inode->i_mapping;
204 struct buffer_head *ret = NULL;
205 pgoff_t index;
206 struct buffer_head *bh;
207 struct buffer_head *head;
208 struct page *page;
209 int all_mapped = 1;
210
211 index = block >> (PAGE_SHIFT - bd_inode->i_blkbits);
212 page = find_get_page_flags(bd_mapping, index, FGP_ACCESSED);
213 if (!page)
214 goto out;
215
216 spin_lock(&bd_mapping->private_lock);
217 if (!page_has_buffers(page))
218 goto out_unlock;
219 head = page_buffers(page);
220 bh = head;
221 do {
222 if (!buffer_mapped(bh))
223 all_mapped = 0;
224 else if (bh->b_blocknr == block) {
225 ret = bh;
226 get_bh(bh);
227 goto out_unlock;
228 }
229 bh = bh->b_this_page;
230 } while (bh != head);
231
232
233
234
235
236
237 if (all_mapped) {
238 printk("__find_get_block_slow() failed. "
239 "block=%llu, b_blocknr=%llu\n",
240 (unsigned long long)block,
241 (unsigned long long)bh->b_blocknr);
242 printk("b_state=0x%08lx, b_size=%zu\n",
243 bh->b_state, bh->b_size);
244 printk("device %pg blocksize: %d\n", bdev,
245 1 << bd_inode->i_blkbits);
246 }
247out_unlock:
248 spin_unlock(&bd_mapping->private_lock);
249 put_page(page);
250out:
251 return ret;
252}
253
254
255
256
257static void free_more_memory(void)
258{
259 struct zoneref *z;
260 int nid;
261
262 wakeup_flusher_threads(1024, WB_REASON_FREE_MORE_MEM);
263 yield();
264
265 for_each_online_node(nid) {
266
267 z = first_zones_zonelist(node_zonelist(nid, GFP_NOFS),
268 gfp_zone(GFP_NOFS), NULL);
269 if (z->zone)
270 try_to_free_pages(node_zonelist(nid, GFP_NOFS), 0,
271 GFP_NOFS, NULL);
272 }
273}
274
275
276
277
278
279static void end_buffer_async_read(struct buffer_head *bh, int uptodate)
280{
281 unsigned long flags;
282 struct buffer_head *first;
283 struct buffer_head *tmp;
284 struct page *page;
285 int page_uptodate = 1;
286
287 BUG_ON(!buffer_async_read(bh));
288
289 page = bh->b_page;
290 if (uptodate) {
291 set_buffer_uptodate(bh);
292 } else {
293 clear_buffer_uptodate(bh);
294 buffer_io_error(bh, ", async page read");
295 SetPageError(page);
296 }
297
298
299
300
301
302
303 first = page_buffers(page);
304 local_irq_save(flags);
305 bit_spin_lock(BH_Uptodate_Lock, &first->b_state);
306 clear_buffer_async_read(bh);
307 unlock_buffer(bh);
308 tmp = bh;
309 do {
310 if (!buffer_uptodate(tmp))
311 page_uptodate = 0;
312 if (buffer_async_read(tmp)) {
313 BUG_ON(!buffer_locked(tmp));
314 goto still_busy;
315 }
316 tmp = tmp->b_this_page;
317 } while (tmp != bh);
318 bit_spin_unlock(BH_Uptodate_Lock, &first->b_state);
319 local_irq_restore(flags);
320
321
322
323
324
325 if (page_uptodate && !PageError(page))
326 SetPageUptodate(page);
327 unlock_page(page);
328 return;
329
330still_busy:
331 bit_spin_unlock(BH_Uptodate_Lock, &first->b_state);
332 local_irq_restore(flags);
333 return;
334}
335
336
337
338
339
340void end_buffer_async_write(struct buffer_head *bh, int uptodate)
341{
342 unsigned long flags;
343 struct buffer_head *first;
344 struct buffer_head *tmp;
345 struct page *page;
346
347 BUG_ON(!buffer_async_write(bh));
348
349 page = bh->b_page;
350 if (uptodate) {
351 set_buffer_uptodate(bh);
352 } else {
353 buffer_io_error(bh, ", lost async page write");
354 mapping_set_error(page->mapping, -EIO);
355 set_buffer_write_io_error(bh);
356 clear_buffer_uptodate(bh);
357 SetPageError(page);
358 }
359
360 first = page_buffers(page);
361 local_irq_save(flags);
362 bit_spin_lock(BH_Uptodate_Lock, &first->b_state);
363
364 clear_buffer_async_write(bh);
365 unlock_buffer(bh);
366 tmp = bh->b_this_page;
367 while (tmp != bh) {
368 if (buffer_async_write(tmp)) {
369 BUG_ON(!buffer_locked(tmp));
370 goto still_busy;
371 }
372 tmp = tmp->b_this_page;
373 }
374 bit_spin_unlock(BH_Uptodate_Lock, &first->b_state);
375 local_irq_restore(flags);
376 end_page_writeback(page);
377 return;
378
379still_busy:
380 bit_spin_unlock(BH_Uptodate_Lock, &first->b_state);
381 local_irq_restore(flags);
382 return;
383}
384EXPORT_SYMBOL(end_buffer_async_write);
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407static void mark_buffer_async_read(struct buffer_head *bh)
408{
409 bh->b_end_io = end_buffer_async_read;
410 set_buffer_async_read(bh);
411}
412
413static void mark_buffer_async_write_endio(struct buffer_head *bh,
414 bh_end_io_t *handler)
415{
416 bh->b_end_io = handler;
417 set_buffer_async_write(bh);
418}
419
420void mark_buffer_async_write(struct buffer_head *bh)
421{
422 mark_buffer_async_write_endio(bh, end_buffer_async_write);
423}
424EXPORT_SYMBOL(mark_buffer_async_write);
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479static void __remove_assoc_queue(struct buffer_head *bh)
480{
481 list_del_init(&bh->b_assoc_buffers);
482 WARN_ON(!bh->b_assoc_map);
483 if (buffer_write_io_error(bh))
484 set_bit(AS_EIO, &bh->b_assoc_map->flags);
485 bh->b_assoc_map = NULL;
486}
487
488int inode_has_buffers(struct inode *inode)
489{
490 return !list_empty(&inode->i_data.private_list);
491}
492
493
494
495
496
497
498
499
500
501
502
503static int osync_buffers_list(spinlock_t *lock, struct list_head *list)
504{
505 struct buffer_head *bh;
506 struct list_head *p;
507 int err = 0;
508
509 spin_lock(lock);
510repeat:
511 list_for_each_prev(p, list) {
512 bh = BH_ENTRY(p);
513 if (buffer_locked(bh)) {
514 get_bh(bh);
515 spin_unlock(lock);
516 wait_on_buffer(bh);
517 if (!buffer_uptodate(bh))
518 err = -EIO;
519 brelse(bh);
520 spin_lock(lock);
521 goto repeat;
522 }
523 }
524 spin_unlock(lock);
525 return err;
526}
527
528static void do_thaw_one(struct super_block *sb, void *unused)
529{
530 while (sb->s_bdev && !thaw_bdev(sb->s_bdev, sb))
531 printk(KERN_WARNING "Emergency Thaw on %pg\n", sb->s_bdev);
532}
533
534static void do_thaw_all(struct work_struct *work)
535{
536 iterate_supers(do_thaw_one, NULL);
537 kfree(work);
538 printk(KERN_WARNING "Emergency Thaw complete\n");
539}
540
541
542
543
544
545
546void emergency_thaw_all(void)
547{
548 struct work_struct *work;
549
550 work = kmalloc(sizeof(*work), GFP_ATOMIC);
551 if (work) {
552 INIT_WORK(work, do_thaw_all);
553 schedule_work(work);
554 }
555}
556
557
558
559
560
561
562
563
564
565
566
567
568int sync_mapping_buffers(struct address_space *mapping)
569{
570 struct address_space *buffer_mapping = mapping->private_data;
571
572 if (buffer_mapping == NULL || list_empty(&mapping->private_list))
573 return 0;
574
575 return fsync_buffers_list(&buffer_mapping->private_lock,
576 &mapping->private_list);
577}
578EXPORT_SYMBOL(sync_mapping_buffers);
579
580
581
582
583
584
585
586void write_boundary_block(struct block_device *bdev,
587 sector_t bblock, unsigned blocksize)
588{
589 struct buffer_head *bh = __find_get_block(bdev, bblock + 1, blocksize);
590 if (bh) {
591 if (buffer_dirty(bh))
592 ll_rw_block(REQ_OP_WRITE, 0, 1, &bh);
593 put_bh(bh);
594 }
595}
596
597void mark_buffer_dirty_inode(struct buffer_head *bh, struct inode *inode)
598{
599 struct address_space *mapping = inode->i_mapping;
600 struct address_space *buffer_mapping = bh->b_page->mapping;
601
602 mark_buffer_dirty(bh);
603 if (!mapping->private_data) {
604 mapping->private_data = buffer_mapping;
605 } else {
606 BUG_ON(mapping->private_data != buffer_mapping);
607 }
608 if (!bh->b_assoc_map) {
609 spin_lock(&buffer_mapping->private_lock);
610 list_move_tail(&bh->b_assoc_buffers,
611 &mapping->private_list);
612 bh->b_assoc_map = mapping;
613 spin_unlock(&buffer_mapping->private_lock);
614 }
615}
616EXPORT_SYMBOL(mark_buffer_dirty_inode);
617
618
619
620
621
622
623
624
625
626
627static void __set_page_dirty(struct page *page, struct address_space *mapping,
628 int warn)
629{
630 unsigned long flags;
631
632 spin_lock_irqsave(&mapping->tree_lock, flags);
633 if (page->mapping) {
634 WARN_ON_ONCE(warn && !PageUptodate(page));
635 account_page_dirtied(page, mapping);
636 radix_tree_tag_set(&mapping->page_tree,
637 page_index(page), PAGECACHE_TAG_DIRTY);
638 }
639 spin_unlock_irqrestore(&mapping->tree_lock, flags);
640}
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667int __set_page_dirty_buffers(struct page *page)
668{
669 int newly_dirty;
670 struct address_space *mapping = page_mapping(page);
671
672 if (unlikely(!mapping))
673 return !TestSetPageDirty(page);
674
675 spin_lock(&mapping->private_lock);
676 if (page_has_buffers(page)) {
677 struct buffer_head *head = page_buffers(page);
678 struct buffer_head *bh = head;
679
680 do {
681 set_buffer_dirty(bh);
682 bh = bh->b_this_page;
683 } while (bh != head);
684 }
685
686
687
688
689 lock_page_memcg(page);
690 newly_dirty = !TestSetPageDirty(page);
691 spin_unlock(&mapping->private_lock);
692
693 if (newly_dirty)
694 __set_page_dirty(page, mapping, 1);
695
696 unlock_page_memcg(page);
697
698 if (newly_dirty)
699 __mark_inode_dirty(mapping->host, I_DIRTY_PAGES);
700
701 return newly_dirty;
702}
703EXPORT_SYMBOL(__set_page_dirty_buffers);
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724static int fsync_buffers_list(spinlock_t *lock, struct list_head *list)
725{
726 struct buffer_head *bh;
727 struct list_head tmp;
728 struct address_space *mapping;
729 int err = 0, err2;
730 struct blk_plug plug;
731
732 INIT_LIST_HEAD(&tmp);
733 blk_start_plug(&plug);
734
735 spin_lock(lock);
736 while (!list_empty(list)) {
737 bh = BH_ENTRY(list->next);
738 mapping = bh->b_assoc_map;
739 __remove_assoc_queue(bh);
740
741
742 smp_mb();
743 if (buffer_dirty(bh) || buffer_locked(bh)) {
744 list_add(&bh->b_assoc_buffers, &tmp);
745 bh->b_assoc_map = mapping;
746 if (buffer_dirty(bh)) {
747 get_bh(bh);
748 spin_unlock(lock);
749
750
751
752
753
754
755
756 write_dirty_buffer(bh, WRITE_SYNC);
757
758
759
760
761
762
763
764 brelse(bh);
765 spin_lock(lock);
766 }
767 }
768 }
769
770 spin_unlock(lock);
771 blk_finish_plug(&plug);
772 spin_lock(lock);
773
774 while (!list_empty(&tmp)) {
775 bh = BH_ENTRY(tmp.prev);
776 get_bh(bh);
777 mapping = bh->b_assoc_map;
778 __remove_assoc_queue(bh);
779
780
781 smp_mb();
782 if (buffer_dirty(bh)) {
783 list_add(&bh->b_assoc_buffers,
784 &mapping->private_list);
785 bh->b_assoc_map = mapping;
786 }
787 spin_unlock(lock);
788 wait_on_buffer(bh);
789 if (!buffer_uptodate(bh))
790 err = -EIO;
791 brelse(bh);
792 spin_lock(lock);
793 }
794
795 spin_unlock(lock);
796 err2 = osync_buffers_list(lock, list);
797 if (err)
798 return err;
799 else
800 return err2;
801}
802
803
804
805
806
807
808
809
810
811
812void invalidate_inode_buffers(struct inode *inode)
813{
814 if (inode_has_buffers(inode)) {
815 struct address_space *mapping = &inode->i_data;
816 struct list_head *list = &mapping->private_list;
817 struct address_space *buffer_mapping = mapping->private_data;
818
819 spin_lock(&buffer_mapping->private_lock);
820 while (!list_empty(list))
821 __remove_assoc_queue(BH_ENTRY(list->next));
822 spin_unlock(&buffer_mapping->private_lock);
823 }
824}
825EXPORT_SYMBOL(invalidate_inode_buffers);
826
827
828
829
830
831
832
833int remove_inode_buffers(struct inode *inode)
834{
835 int ret = 1;
836
837 if (inode_has_buffers(inode)) {
838 struct address_space *mapping = &inode->i_data;
839 struct list_head *list = &mapping->private_list;
840 struct address_space *buffer_mapping = mapping->private_data;
841
842 spin_lock(&buffer_mapping->private_lock);
843 while (!list_empty(list)) {
844 struct buffer_head *bh = BH_ENTRY(list->next);
845 if (buffer_dirty(bh)) {
846 ret = 0;
847 break;
848 }
849 __remove_assoc_queue(bh);
850 }
851 spin_unlock(&buffer_mapping->private_lock);
852 }
853 return ret;
854}
855
856
857
858
859
860
861
862
863
864
865struct buffer_head *alloc_page_buffers(struct page *page, unsigned long size,
866 int retry)
867{
868 struct buffer_head *bh, *head;
869 long offset;
870
871try_again:
872 head = NULL;
873 offset = PAGE_SIZE;
874 while ((offset -= size) >= 0) {
875 bh = alloc_buffer_head(GFP_NOFS);
876 if (!bh)
877 goto no_grow;
878
879 bh->b_this_page = head;
880 bh->b_blocknr = -1;
881 head = bh;
882
883 bh->b_size = size;
884
885
886 set_bh_page(bh, page, offset);
887 }
888 return head;
889
890
891
892no_grow:
893 if (head) {
894 do {
895 bh = head;
896 head = head->b_this_page;
897 free_buffer_head(bh);
898 } while (head);
899 }
900
901
902
903
904
905
906
907 if (!retry)
908 return NULL;
909
910
911
912
913
914
915
916 free_more_memory();
917 goto try_again;
918}
919EXPORT_SYMBOL_GPL(alloc_page_buffers);
920
921static inline void
922link_dev_buffers(struct page *page, struct buffer_head *head)
923{
924 struct buffer_head *bh, *tail;
925
926 bh = head;
927 do {
928 tail = bh;
929 bh = bh->b_this_page;
930 } while (bh);
931 tail->b_this_page = head;
932 attach_page_buffers(page, head);
933}
934
935static sector_t blkdev_max_block(struct block_device *bdev, unsigned int size)
936{
937 sector_t retval = ~((sector_t)0);
938 loff_t sz = i_size_read(bdev->bd_inode);
939
940 if (sz) {
941 unsigned int sizebits = blksize_bits(size);
942 retval = (sz >> sizebits);
943 }
944 return retval;
945}
946
947
948
949
950static sector_t
951init_page_buffers(struct page *page, struct block_device *bdev,
952 sector_t block, int size)
953{
954 struct buffer_head *head = page_buffers(page);
955 struct buffer_head *bh = head;
956 int uptodate = PageUptodate(page);
957 sector_t end_block = blkdev_max_block(I_BDEV(bdev->bd_inode), size);
958
959 do {
960 if (!buffer_mapped(bh)) {
961 init_buffer(bh, NULL, NULL);
962 bh->b_bdev = bdev;
963 bh->b_blocknr = block;
964 if (uptodate)
965 set_buffer_uptodate(bh);
966 if (block < end_block)
967 set_buffer_mapped(bh);
968 }
969 block++;
970 bh = bh->b_this_page;
971 } while (bh != head);
972
973
974
975
976 return end_block;
977}
978
979
980
981
982
983
984static int
985grow_dev_page(struct block_device *bdev, sector_t block,
986 pgoff_t index, int size, int sizebits, gfp_t gfp)
987{
988 struct inode *inode = bdev->bd_inode;
989 struct page *page;
990 struct buffer_head *bh;
991 sector_t end_block;
992 int ret = 0;
993 gfp_t gfp_mask;
994
995 gfp_mask = mapping_gfp_constraint(inode->i_mapping, ~__GFP_FS) | gfp;
996
997
998
999
1000
1001
1002
1003 gfp_mask |= __GFP_NOFAIL;
1004
1005 page = find_or_create_page(inode->i_mapping, index, gfp_mask);
1006 if (!page)
1007 return ret;
1008
1009 BUG_ON(!PageLocked(page));
1010
1011 if (page_has_buffers(page)) {
1012 bh = page_buffers(page);
1013 if (bh->b_size == size) {
1014 end_block = init_page_buffers(page, bdev,
1015 (sector_t)index << sizebits,
1016 size);
1017 goto done;
1018 }
1019 if (!try_to_free_buffers(page))
1020 goto failed;
1021 }
1022
1023
1024
1025
1026 bh = alloc_page_buffers(page, size, 0);
1027 if (!bh)
1028 goto failed;
1029
1030
1031
1032
1033
1034
1035 spin_lock(&inode->i_mapping->private_lock);
1036 link_dev_buffers(page, bh);
1037 end_block = init_page_buffers(page, bdev, (sector_t)index << sizebits,
1038 size);
1039 spin_unlock(&inode->i_mapping->private_lock);
1040done:
1041 ret = (block < end_block) ? 1 : -ENXIO;
1042failed:
1043 unlock_page(page);
1044 put_page(page);
1045 return ret;
1046}
1047
1048
1049
1050
1051
1052static int
1053grow_buffers(struct block_device *bdev, sector_t block, int size, gfp_t gfp)
1054{
1055 pgoff_t index;
1056 int sizebits;
1057
1058 sizebits = -1;
1059 do {
1060 sizebits++;
1061 } while ((size << sizebits) < PAGE_SIZE);
1062
1063 index = block >> sizebits;
1064
1065
1066
1067
1068
1069 if (unlikely(index != block >> sizebits)) {
1070 printk(KERN_ERR "%s: requested out-of-range block %llu for "
1071 "device %pg\n",
1072 __func__, (unsigned long long)block,
1073 bdev);
1074 return -EIO;
1075 }
1076
1077
1078 return grow_dev_page(bdev, block, index, size, sizebits, gfp);
1079}
1080
1081static struct buffer_head *
1082__getblk_slow(struct block_device *bdev, sector_t block,
1083 unsigned size, gfp_t gfp)
1084{
1085
1086 if (unlikely(size & (bdev_logical_block_size(bdev)-1) ||
1087 (size < 512 || size > PAGE_SIZE))) {
1088 printk(KERN_ERR "getblk(): invalid block size %d requested\n",
1089 size);
1090 printk(KERN_ERR "logical block size: %d\n",
1091 bdev_logical_block_size(bdev));
1092
1093 dump_stack();
1094 return NULL;
1095 }
1096
1097 for (;;) {
1098 struct buffer_head *bh;
1099 int ret;
1100
1101 bh = __find_get_block(bdev, block, size);
1102 if (bh)
1103 return bh;
1104
1105 ret = grow_buffers(bdev, block, size, gfp);
1106 if (ret < 0)
1107 return NULL;
1108 if (ret == 0)
1109 free_more_memory();
1110 }
1111}
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148void mark_buffer_dirty(struct buffer_head *bh)
1149{
1150 WARN_ON_ONCE(!buffer_uptodate(bh));
1151
1152 trace_block_dirty_buffer(bh);
1153
1154
1155
1156
1157
1158
1159
1160 if (buffer_dirty(bh)) {
1161 smp_mb();
1162 if (buffer_dirty(bh))
1163 return;
1164 }
1165
1166 if (!test_set_buffer_dirty(bh)) {
1167 struct page *page = bh->b_page;
1168 struct address_space *mapping = NULL;
1169
1170 lock_page_memcg(page);
1171 if (!TestSetPageDirty(page)) {
1172 mapping = page_mapping(page);
1173 if (mapping)
1174 __set_page_dirty(page, mapping, 0);
1175 }
1176 unlock_page_memcg(page);
1177 if (mapping)
1178 __mark_inode_dirty(mapping->host, I_DIRTY_PAGES);
1179 }
1180}
1181EXPORT_SYMBOL(mark_buffer_dirty);
1182
1183
1184
1185
1186
1187
1188
1189
1190void __brelse(struct buffer_head * buf)
1191{
1192 if (atomic_read(&buf->b_count)) {
1193 put_bh(buf);
1194 return;
1195 }
1196 WARN(1, KERN_ERR "VFS: brelse: Trying to free free buffer\n");
1197}
1198EXPORT_SYMBOL(__brelse);
1199
1200
1201
1202
1203
1204void __bforget(struct buffer_head *bh)
1205{
1206 clear_buffer_dirty(bh);
1207 if (bh->b_assoc_map) {
1208 struct address_space *buffer_mapping = bh->b_page->mapping;
1209
1210 spin_lock(&buffer_mapping->private_lock);
1211 list_del_init(&bh->b_assoc_buffers);
1212 bh->b_assoc_map = NULL;
1213 spin_unlock(&buffer_mapping->private_lock);
1214 }
1215 __brelse(bh);
1216}
1217EXPORT_SYMBOL(__bforget);
1218
1219static struct buffer_head *__bread_slow(struct buffer_head *bh)
1220{
1221 lock_buffer(bh);
1222 if (buffer_uptodate(bh)) {
1223 unlock_buffer(bh);
1224 return bh;
1225 } else {
1226 get_bh(bh);
1227 bh->b_end_io = end_buffer_read_sync;
1228 submit_bh(REQ_OP_READ, 0, bh);
1229 wait_on_buffer(bh);
1230 if (buffer_uptodate(bh))
1231 return bh;
1232 }
1233 brelse(bh);
1234 return NULL;
1235}
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251#define BH_LRU_SIZE 16
1252
1253struct bh_lru {
1254 struct buffer_head *bhs[BH_LRU_SIZE];
1255};
1256
1257static DEFINE_PER_CPU(struct bh_lru, bh_lrus) = {{ NULL }};
1258
1259#ifdef CONFIG_SMP
1260#define bh_lru_lock() local_irq_disable()
1261#define bh_lru_unlock() local_irq_enable()
1262#else
1263#define bh_lru_lock() preempt_disable()
1264#define bh_lru_unlock() preempt_enable()
1265#endif
1266
1267static inline void check_irqs_on(void)
1268{
1269#ifdef irqs_disabled
1270 BUG_ON(irqs_disabled());
1271#endif
1272}
1273
1274
1275
1276
1277static void bh_lru_install(struct buffer_head *bh)
1278{
1279 struct buffer_head *evictee = NULL;
1280
1281 check_irqs_on();
1282 bh_lru_lock();
1283 if (__this_cpu_read(bh_lrus.bhs[0]) != bh) {
1284 struct buffer_head *bhs[BH_LRU_SIZE];
1285 int in;
1286 int out = 0;
1287
1288 get_bh(bh);
1289 bhs[out++] = bh;
1290 for (in = 0; in < BH_LRU_SIZE; in++) {
1291 struct buffer_head *bh2 =
1292 __this_cpu_read(bh_lrus.bhs[in]);
1293
1294 if (bh2 == bh) {
1295 __brelse(bh2);
1296 } else {
1297 if (out >= BH_LRU_SIZE) {
1298 BUG_ON(evictee != NULL);
1299 evictee = bh2;
1300 } else {
1301 bhs[out++] = bh2;
1302 }
1303 }
1304 }
1305 while (out < BH_LRU_SIZE)
1306 bhs[out++] = NULL;
1307 memcpy(this_cpu_ptr(&bh_lrus.bhs), bhs, sizeof(bhs));
1308 }
1309 bh_lru_unlock();
1310
1311 if (evictee)
1312 __brelse(evictee);
1313}
1314
1315
1316
1317
1318static struct buffer_head *
1319lookup_bh_lru(struct block_device *bdev, sector_t block, unsigned size)
1320{
1321 struct buffer_head *ret = NULL;
1322 unsigned int i;
1323
1324 check_irqs_on();
1325 bh_lru_lock();
1326 for (i = 0; i < BH_LRU_SIZE; i++) {
1327 struct buffer_head *bh = __this_cpu_read(bh_lrus.bhs[i]);
1328
1329 if (bh && bh->b_blocknr == block && bh->b_bdev == bdev &&
1330 bh->b_size == size) {
1331 if (i) {
1332 while (i) {
1333 __this_cpu_write(bh_lrus.bhs[i],
1334 __this_cpu_read(bh_lrus.bhs[i - 1]));
1335 i--;
1336 }
1337 __this_cpu_write(bh_lrus.bhs[0], bh);
1338 }
1339 get_bh(bh);
1340 ret = bh;
1341 break;
1342 }
1343 }
1344 bh_lru_unlock();
1345 return ret;
1346}
1347
1348
1349
1350
1351
1352
1353struct buffer_head *
1354__find_get_block(struct block_device *bdev, sector_t block, unsigned size)
1355{
1356 struct buffer_head *bh = lookup_bh_lru(bdev, block, size);
1357
1358 if (bh == NULL) {
1359
1360 bh = __find_get_block_slow(bdev, block);
1361 if (bh)
1362 bh_lru_install(bh);
1363 } else
1364 touch_buffer(bh);
1365
1366 return bh;
1367}
1368EXPORT_SYMBOL(__find_get_block);
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378struct buffer_head *
1379__getblk_gfp(struct block_device *bdev, sector_t block,
1380 unsigned size, gfp_t gfp)
1381{
1382 struct buffer_head *bh = __find_get_block(bdev, block, size);
1383
1384 might_sleep();
1385 if (bh == NULL)
1386 bh = __getblk_slow(bdev, block, size, gfp);
1387 return bh;
1388}
1389EXPORT_SYMBOL(__getblk_gfp);
1390
1391
1392
1393
1394void __breadahead(struct block_device *bdev, sector_t block, unsigned size)
1395{
1396 struct buffer_head *bh = __getblk(bdev, block, size);
1397 if (likely(bh)) {
1398 ll_rw_block(REQ_OP_READ, REQ_RAHEAD, 1, &bh);
1399 brelse(bh);
1400 }
1401}
1402EXPORT_SYMBOL(__breadahead);
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416struct buffer_head *
1417__bread_gfp(struct block_device *bdev, sector_t block,
1418 unsigned size, gfp_t gfp)
1419{
1420 struct buffer_head *bh = __getblk_gfp(bdev, block, size, gfp);
1421
1422 if (likely(bh) && !buffer_uptodate(bh))
1423 bh = __bread_slow(bh);
1424 return bh;
1425}
1426EXPORT_SYMBOL(__bread_gfp);
1427
1428
1429
1430
1431
1432
1433static void invalidate_bh_lru(void *arg)
1434{
1435 struct bh_lru *b = &get_cpu_var(bh_lrus);
1436 int i;
1437
1438 for (i = 0; i < BH_LRU_SIZE; i++) {
1439 brelse(b->bhs[i]);
1440 b->bhs[i] = NULL;
1441 }
1442 put_cpu_var(bh_lrus);
1443}
1444
1445static bool has_bh_in_lru(int cpu, void *dummy)
1446{
1447 struct bh_lru *b = per_cpu_ptr(&bh_lrus, cpu);
1448 int i;
1449
1450 for (i = 0; i < BH_LRU_SIZE; i++) {
1451 if (b->bhs[i])
1452 return 1;
1453 }
1454
1455 return 0;
1456}
1457
1458void invalidate_bh_lrus(void)
1459{
1460 on_each_cpu_cond(has_bh_in_lru, invalidate_bh_lru, NULL, 1, GFP_KERNEL);
1461}
1462EXPORT_SYMBOL_GPL(invalidate_bh_lrus);
1463
1464void set_bh_page(struct buffer_head *bh,
1465 struct page *page, unsigned long offset)
1466{
1467 bh->b_page = page;
1468 BUG_ON(offset >= PAGE_SIZE);
1469 if (PageHighMem(page))
1470
1471
1472
1473 bh->b_data = (char *)(0 + offset);
1474 else
1475 bh->b_data = page_address(page) + offset;
1476}
1477EXPORT_SYMBOL(set_bh_page);
1478
1479
1480
1481
1482
1483
1484#define BUFFER_FLAGS_DISCARD \
1485 (1 << BH_Mapped | 1 << BH_New | 1 << BH_Req | \
1486 1 << BH_Delay | 1 << BH_Unwritten)
1487
1488static void discard_buffer(struct buffer_head * bh)
1489{
1490 unsigned long b_state, b_state_old;
1491
1492 lock_buffer(bh);
1493 clear_buffer_dirty(bh);
1494 bh->b_bdev = NULL;
1495 b_state = bh->b_state;
1496 for (;;) {
1497 b_state_old = cmpxchg(&bh->b_state, b_state,
1498 (b_state & ~BUFFER_FLAGS_DISCARD));
1499 if (b_state_old == b_state)
1500 break;
1501 b_state = b_state_old;
1502 }
1503 unlock_buffer(bh);
1504}
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522void block_invalidatepage(struct page *page, unsigned int offset,
1523 unsigned int length)
1524{
1525 struct buffer_head *head, *bh, *next;
1526 unsigned int curr_off = 0;
1527 unsigned int stop = length + offset;
1528
1529 BUG_ON(!PageLocked(page));
1530 if (!page_has_buffers(page))
1531 goto out;
1532
1533
1534
1535
1536 BUG_ON(stop > PAGE_SIZE || stop < length);
1537
1538 head = page_buffers(page);
1539 bh = head;
1540 do {
1541 unsigned int next_off = curr_off + bh->b_size;
1542 next = bh->b_this_page;
1543
1544
1545
1546
1547 if (next_off > stop)
1548 goto out;
1549
1550
1551
1552
1553 if (offset <= curr_off)
1554 discard_buffer(bh);
1555 curr_off = next_off;
1556 bh = next;
1557 } while (bh != head);
1558
1559
1560
1561
1562
1563
1564 if (offset == 0)
1565 try_to_release_page(page, 0);
1566out:
1567 return;
1568}
1569EXPORT_SYMBOL(block_invalidatepage);
1570
1571
1572
1573
1574
1575
1576
1577void create_empty_buffers(struct page *page,
1578 unsigned long blocksize, unsigned long b_state)
1579{
1580 struct buffer_head *bh, *head, *tail;
1581
1582 head = alloc_page_buffers(page, blocksize, 1);
1583 bh = head;
1584 do {
1585 bh->b_state |= b_state;
1586 tail = bh;
1587 bh = bh->b_this_page;
1588 } while (bh);
1589 tail->b_this_page = head;
1590
1591 spin_lock(&page->mapping->private_lock);
1592 if (PageUptodate(page) || PageDirty(page)) {
1593 bh = head;
1594 do {
1595 if (PageDirty(page))
1596 set_buffer_dirty(bh);
1597 if (PageUptodate(page))
1598 set_buffer_uptodate(bh);
1599 bh = bh->b_this_page;
1600 } while (bh != head);
1601 }
1602 attach_page_buffers(page, head);
1603 spin_unlock(&page->mapping->private_lock);
1604}
1605EXPORT_SYMBOL(create_empty_buffers);
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623void unmap_underlying_metadata(struct block_device *bdev, sector_t block)
1624{
1625 struct buffer_head *old_bh;
1626
1627 might_sleep();
1628
1629 old_bh = __find_get_block_slow(bdev, block);
1630 if (old_bh) {
1631 clear_buffer_dirty(old_bh);
1632 wait_on_buffer(old_bh);
1633 clear_buffer_req(old_bh);
1634 __brelse(old_bh);
1635 }
1636}
1637EXPORT_SYMBOL(unmap_underlying_metadata);
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647static inline int block_size_bits(unsigned int blocksize)
1648{
1649 return ilog2(blocksize);
1650}
1651
1652static struct buffer_head *create_page_buffers(struct page *page, struct inode *inode, unsigned int b_state)
1653{
1654 BUG_ON(!PageLocked(page));
1655
1656 if (!page_has_buffers(page))
1657 create_empty_buffers(page, 1 << ACCESS_ONCE(inode->i_blkbits), b_state);
1658 return page_buffers(page);
1659}
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690int __block_write_full_page(struct inode *inode, struct page *page,
1691 get_block_t *get_block, struct writeback_control *wbc,
1692 bh_end_io_t *handler)
1693{
1694 int err;
1695 sector_t block;
1696 sector_t last_block;
1697 struct buffer_head *bh, *head;
1698 unsigned int blocksize, bbits;
1699 int nr_underway = 0;
1700 int write_flags = (wbc->sync_mode == WB_SYNC_ALL ? WRITE_SYNC : 0);
1701
1702 head = create_page_buffers(page, inode,
1703 (1 << BH_Dirty)|(1 << BH_Uptodate));
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715 bh = head;
1716 blocksize = bh->b_size;
1717 bbits = block_size_bits(blocksize);
1718
1719 block = (sector_t)page->index << (PAGE_SHIFT - bbits);
1720 last_block = (i_size_read(inode) - 1) >> bbits;
1721
1722
1723
1724
1725
1726 do {
1727 if (block > last_block) {
1728
1729
1730
1731
1732
1733
1734
1735
1736 clear_buffer_dirty(bh);
1737 set_buffer_uptodate(bh);
1738 } else if ((!buffer_mapped(bh) || buffer_delay(bh)) &&
1739 buffer_dirty(bh)) {
1740 WARN_ON(bh->b_size != blocksize);
1741 err = get_block(inode, block, bh, 1);
1742 if (err)
1743 goto recover;
1744 clear_buffer_delay(bh);
1745 if (buffer_new(bh)) {
1746
1747 clear_buffer_new(bh);
1748 unmap_underlying_metadata(bh->b_bdev,
1749 bh->b_blocknr);
1750 }
1751 }
1752 bh = bh->b_this_page;
1753 block++;
1754 } while (bh != head);
1755
1756 do {
1757 if (!buffer_mapped(bh))
1758 continue;
1759
1760
1761
1762
1763
1764
1765
1766 if (wbc->sync_mode != WB_SYNC_NONE) {
1767 lock_buffer(bh);
1768 } else if (!trylock_buffer(bh)) {
1769 redirty_page_for_writepage(wbc, page);
1770 continue;
1771 }
1772 if (test_clear_buffer_dirty(bh)) {
1773 mark_buffer_async_write_endio(bh, handler);
1774 } else {
1775 unlock_buffer(bh);
1776 }
1777 } while ((bh = bh->b_this_page) != head);
1778
1779
1780
1781
1782
1783 BUG_ON(PageWriteback(page));
1784 set_page_writeback(page);
1785
1786 do {
1787 struct buffer_head *next = bh->b_this_page;
1788 if (buffer_async_write(bh)) {
1789 submit_bh_wbc(REQ_OP_WRITE, write_flags, bh, 0, wbc);
1790 nr_underway++;
1791 }
1792 bh = next;
1793 } while (bh != head);
1794 unlock_page(page);
1795
1796 err = 0;
1797done:
1798 if (nr_underway == 0) {
1799
1800
1801
1802
1803
1804 end_page_writeback(page);
1805
1806
1807
1808
1809
1810 }
1811 return err;
1812
1813recover:
1814
1815
1816
1817
1818
1819
1820 bh = head;
1821
1822 do {
1823 if (buffer_mapped(bh) && buffer_dirty(bh) &&
1824 !buffer_delay(bh)) {
1825 lock_buffer(bh);
1826 mark_buffer_async_write_endio(bh, handler);
1827 } else {
1828
1829
1830
1831
1832 clear_buffer_dirty(bh);
1833 }
1834 } while ((bh = bh->b_this_page) != head);
1835 SetPageError(page);
1836 BUG_ON(PageWriteback(page));
1837 mapping_set_error(page->mapping, err);
1838 set_page_writeback(page);
1839 do {
1840 struct buffer_head *next = bh->b_this_page;
1841 if (buffer_async_write(bh)) {
1842 clear_buffer_dirty(bh);
1843 submit_bh_wbc(REQ_OP_WRITE, write_flags, bh, 0, wbc);
1844 nr_underway++;
1845 }
1846 bh = next;
1847 } while (bh != head);
1848 unlock_page(page);
1849 goto done;
1850}
1851EXPORT_SYMBOL(__block_write_full_page);
1852
1853
1854
1855
1856
1857
1858void page_zero_new_buffers(struct page *page, unsigned from, unsigned to)
1859{
1860 unsigned int block_start, block_end;
1861 struct buffer_head *head, *bh;
1862
1863 BUG_ON(!PageLocked(page));
1864 if (!page_has_buffers(page))
1865 return;
1866
1867 bh = head = page_buffers(page);
1868 block_start = 0;
1869 do {
1870 block_end = block_start + bh->b_size;
1871
1872 if (buffer_new(bh)) {
1873 if (block_end > from && block_start < to) {
1874 if (!PageUptodate(page)) {
1875 unsigned start, size;
1876
1877 start = max(from, block_start);
1878 size = min(to, block_end) - start;
1879
1880 zero_user(page, start, size);
1881 set_buffer_uptodate(bh);
1882 }
1883
1884 clear_buffer_new(bh);
1885 mark_buffer_dirty(bh);
1886 }
1887 }
1888
1889 block_start = block_end;
1890 bh = bh->b_this_page;
1891 } while (bh != head);
1892}
1893EXPORT_SYMBOL(page_zero_new_buffers);
1894
1895static void
1896iomap_to_bh(struct inode *inode, sector_t block, struct buffer_head *bh,
1897 struct iomap *iomap)
1898{
1899 loff_t offset = block << inode->i_blkbits;
1900
1901 bh->b_bdev = iomap->bdev;
1902
1903
1904
1905
1906
1907
1908
1909 BUG_ON(offset >= iomap->offset + iomap->length);
1910
1911 switch (iomap->type) {
1912 case IOMAP_HOLE:
1913
1914
1915
1916
1917
1918 if (!buffer_uptodate(bh) ||
1919 (offset >= i_size_read(inode)))
1920 set_buffer_new(bh);
1921 break;
1922 case IOMAP_DELALLOC:
1923 if (!buffer_uptodate(bh) ||
1924 (offset >= i_size_read(inode)))
1925 set_buffer_new(bh);
1926 set_buffer_uptodate(bh);
1927 set_buffer_mapped(bh);
1928 set_buffer_delay(bh);
1929 break;
1930 case IOMAP_UNWRITTEN:
1931
1932
1933
1934
1935
1936 set_buffer_new(bh);
1937 set_buffer_unwritten(bh);
1938
1939 case IOMAP_MAPPED:
1940 if (offset >= i_size_read(inode))
1941 set_buffer_new(bh);
1942 bh->b_blocknr = (iomap->blkno >> (inode->i_blkbits - 9)) +
1943 ((offset - iomap->offset) >> inode->i_blkbits);
1944 set_buffer_mapped(bh);
1945 break;
1946 }
1947}
1948
1949int __block_write_begin_int(struct page *page, loff_t pos, unsigned len,
1950 get_block_t *get_block, struct iomap *iomap)
1951{
1952 unsigned from = pos & (PAGE_SIZE - 1);
1953 unsigned to = from + len;
1954 struct inode *inode = page->mapping->host;
1955 unsigned block_start, block_end;
1956 sector_t block;
1957 int err = 0;
1958 unsigned blocksize, bbits;
1959 struct buffer_head *bh, *head, *wait[2], **wait_bh=wait;
1960
1961 BUG_ON(!PageLocked(page));
1962 BUG_ON(from > PAGE_SIZE);
1963 BUG_ON(to > PAGE_SIZE);
1964 BUG_ON(from > to);
1965
1966 head = create_page_buffers(page, inode, 0);
1967 blocksize = head->b_size;
1968 bbits = block_size_bits(blocksize);
1969
1970 block = (sector_t)page->index << (PAGE_SHIFT - bbits);
1971
1972 for(bh = head, block_start = 0; bh != head || !block_start;
1973 block++, block_start=block_end, bh = bh->b_this_page) {
1974 block_end = block_start + blocksize;
1975 if (block_end <= from || block_start >= to) {
1976 if (PageUptodate(page)) {
1977 if (!buffer_uptodate(bh))
1978 set_buffer_uptodate(bh);
1979 }
1980 continue;
1981 }
1982 if (buffer_new(bh))
1983 clear_buffer_new(bh);
1984 if (!buffer_mapped(bh)) {
1985 WARN_ON(bh->b_size != blocksize);
1986 if (get_block) {
1987 err = get_block(inode, block, bh, 1);
1988 if (err)
1989 break;
1990 } else {
1991 iomap_to_bh(inode, block, bh, iomap);
1992 }
1993
1994 if (buffer_new(bh)) {
1995 unmap_underlying_metadata(bh->b_bdev,
1996 bh->b_blocknr);
1997 if (PageUptodate(page)) {
1998 clear_buffer_new(bh);
1999 set_buffer_uptodate(bh);
2000 mark_buffer_dirty(bh);
2001 continue;
2002 }
2003 if (block_end > to || block_start < from)
2004 zero_user_segments(page,
2005 to, block_end,
2006 block_start, from);
2007 continue;
2008 }
2009 }
2010 if (PageUptodate(page)) {
2011 if (!buffer_uptodate(bh))
2012 set_buffer_uptodate(bh);
2013 continue;
2014 }
2015 if (!buffer_uptodate(bh) && !buffer_delay(bh) &&
2016 !buffer_unwritten(bh) &&
2017 (block_start < from || block_end > to)) {
2018 ll_rw_block(REQ_OP_READ, 0, 1, &bh);
2019 *wait_bh++=bh;
2020 }
2021 }
2022
2023
2024
2025 while(wait_bh > wait) {
2026 wait_on_buffer(*--wait_bh);
2027 if (!buffer_uptodate(*wait_bh))
2028 err = -EIO;
2029 }
2030 if (unlikely(err))
2031 page_zero_new_buffers(page, from, to);
2032 return err;
2033}
2034
2035int __block_write_begin(struct page *page, loff_t pos, unsigned len,
2036 get_block_t *get_block)
2037{
2038 return __block_write_begin_int(page, pos, len, get_block, NULL);
2039}
2040EXPORT_SYMBOL(__block_write_begin);
2041
2042static int __block_commit_write(struct inode *inode, struct page *page,
2043 unsigned from, unsigned to)
2044{
2045 unsigned block_start, block_end;
2046 int partial = 0;
2047 unsigned blocksize;
2048 struct buffer_head *bh, *head;
2049
2050 bh = head = page_buffers(page);
2051 blocksize = bh->b_size;
2052
2053 block_start = 0;
2054 do {
2055 block_end = block_start + blocksize;
2056 if (block_end <= from || block_start >= to) {
2057 if (!buffer_uptodate(bh))
2058 partial = 1;
2059 } else {
2060 set_buffer_uptodate(bh);
2061 mark_buffer_dirty(bh);
2062 }
2063 clear_buffer_new(bh);
2064
2065 block_start = block_end;
2066 bh = bh->b_this_page;
2067 } while (bh != head);
2068
2069
2070
2071
2072
2073
2074
2075 if (!partial)
2076 SetPageUptodate(page);
2077 return 0;
2078}
2079
2080
2081
2082
2083
2084
2085
2086int block_write_begin(struct address_space *mapping, loff_t pos, unsigned len,
2087 unsigned flags, struct page **pagep, get_block_t *get_block)
2088{
2089 pgoff_t index = pos >> PAGE_SHIFT;
2090 struct page *page;
2091 int status;
2092
2093 page = grab_cache_page_write_begin(mapping, index, flags);
2094 if (!page)
2095 return -ENOMEM;
2096
2097 status = __block_write_begin(page, pos, len, get_block);
2098 if (unlikely(status)) {
2099 unlock_page(page);
2100 put_page(page);
2101 page = NULL;
2102 }
2103
2104 *pagep = page;
2105 return status;
2106}
2107EXPORT_SYMBOL(block_write_begin);
2108
2109int block_write_end(struct file *file, struct address_space *mapping,
2110 loff_t pos, unsigned len, unsigned copied,
2111 struct page *page, void *fsdata)
2112{
2113 struct inode *inode = mapping->host;
2114 unsigned start;
2115
2116 start = pos & (PAGE_SIZE - 1);
2117
2118 if (unlikely(copied < len)) {
2119
2120
2121
2122
2123
2124
2125
2126
2127
2128
2129
2130
2131 if (!PageUptodate(page))
2132 copied = 0;
2133
2134 page_zero_new_buffers(page, start+copied, start+len);
2135 }
2136 flush_dcache_page(page);
2137
2138
2139 __block_commit_write(inode, page, start, start+copied);
2140
2141 return copied;
2142}
2143EXPORT_SYMBOL(block_write_end);
2144
2145int generic_write_end(struct file *file, struct address_space *mapping,
2146 loff_t pos, unsigned len, unsigned copied,
2147 struct page *page, void *fsdata)
2148{
2149 struct inode *inode = mapping->host;
2150 loff_t old_size = inode->i_size;
2151 int i_size_changed = 0;
2152
2153 copied = block_write_end(file, mapping, pos, len, copied, page, fsdata);
2154
2155
2156
2157
2158
2159
2160
2161
2162 if (pos+copied > inode->i_size) {
2163 i_size_write(inode, pos+copied);
2164 i_size_changed = 1;
2165 }
2166
2167 unlock_page(page);
2168 put_page(page);
2169
2170 if (old_size < pos)
2171 pagecache_isize_extended(inode, old_size, pos);
2172
2173
2174
2175
2176
2177
2178 if (i_size_changed)
2179 mark_inode_dirty(inode);
2180
2181 return copied;
2182}
2183EXPORT_SYMBOL(generic_write_end);
2184
2185
2186
2187
2188
2189
2190
2191
2192int block_is_partially_uptodate(struct page *page, unsigned long from,
2193 unsigned long count)
2194{
2195 unsigned block_start, block_end, blocksize;
2196 unsigned to;
2197 struct buffer_head *bh, *head;
2198 int ret = 1;
2199
2200 if (!page_has_buffers(page))
2201 return 0;
2202
2203 head = page_buffers(page);
2204 blocksize = head->b_size;
2205 to = min_t(unsigned, PAGE_SIZE - from, count);
2206 to = from + to;
2207 if (from < blocksize && to > PAGE_SIZE - blocksize)
2208 return 0;
2209
2210 bh = head;
2211 block_start = 0;
2212 do {
2213 block_end = block_start + blocksize;
2214 if (block_end > from && block_start < to) {
2215 if (!buffer_uptodate(bh)) {
2216 ret = 0;
2217 break;
2218 }
2219 if (block_end >= to)
2220 break;
2221 }
2222 block_start = block_end;
2223 bh = bh->b_this_page;
2224 } while (bh != head);
2225
2226 return ret;
2227}
2228EXPORT_SYMBOL(block_is_partially_uptodate);
2229
2230
2231
2232
2233
2234
2235
2236
2237int block_read_full_page(struct page *page, get_block_t *get_block)
2238{
2239 struct inode *inode = page->mapping->host;
2240 sector_t iblock, lblock;
2241 struct buffer_head *bh, *head, *arr[MAX_BUF_PER_PAGE];
2242 unsigned int blocksize, bbits;
2243 int nr, i;
2244 int fully_mapped = 1;
2245
2246 head = create_page_buffers(page, inode, 0);
2247 blocksize = head->b_size;
2248 bbits = block_size_bits(blocksize);
2249
2250 iblock = (sector_t)page->index << (PAGE_SHIFT - bbits);
2251 lblock = (i_size_read(inode)+blocksize-1) >> bbits;
2252 bh = head;
2253 nr = 0;
2254 i = 0;
2255
2256 do {
2257 if (buffer_uptodate(bh))
2258 continue;
2259
2260 if (!buffer_mapped(bh)) {
2261 int err = 0;
2262
2263 fully_mapped = 0;
2264 if (iblock < lblock) {
2265 WARN_ON(bh->b_size != blocksize);
2266 err = get_block(inode, iblock, bh, 0);
2267 if (err)
2268 SetPageError(page);
2269 }
2270 if (!buffer_mapped(bh)) {
2271 zero_user(page, i * blocksize, blocksize);
2272 if (!err)
2273 set_buffer_uptodate(bh);
2274 continue;
2275 }
2276
2277
2278
2279
2280 if (buffer_uptodate(bh))
2281 continue;
2282 }
2283 arr[nr++] = bh;
2284 } while (i++, iblock++, (bh = bh->b_this_page) != head);
2285
2286 if (fully_mapped)
2287 SetPageMappedToDisk(page);
2288
2289 if (!nr) {
2290
2291
2292
2293
2294 if (!PageError(page))
2295 SetPageUptodate(page);
2296 unlock_page(page);
2297 return 0;
2298 }
2299
2300
2301 for (i = 0; i < nr; i++) {
2302 bh = arr[i];
2303 lock_buffer(bh);
2304 mark_buffer_async_read(bh);
2305 }
2306
2307
2308
2309
2310
2311
2312 for (i = 0; i < nr; i++) {
2313 bh = arr[i];
2314 if (buffer_uptodate(bh))
2315 end_buffer_async_read(bh, 1);
2316 else
2317 submit_bh(REQ_OP_READ, 0, bh);
2318 }
2319 return 0;
2320}
2321EXPORT_SYMBOL(block_read_full_page);
2322
2323
2324
2325
2326
2327int generic_cont_expand_simple(struct inode *inode, loff_t size)
2328{
2329 struct address_space *mapping = inode->i_mapping;
2330 struct page *page;
2331 void *fsdata;
2332 int err;
2333
2334 err = inode_newsize_ok(inode, size);
2335 if (err)
2336 goto out;
2337
2338 err = pagecache_write_begin(NULL, mapping, size, 0,
2339 AOP_FLAG_UNINTERRUPTIBLE|AOP_FLAG_CONT_EXPAND,
2340 &page, &fsdata);
2341 if (err)
2342 goto out;
2343
2344 err = pagecache_write_end(NULL, mapping, size, 0, 0, page, fsdata);
2345 BUG_ON(err > 0);
2346
2347out:
2348 return err;
2349}
2350EXPORT_SYMBOL(generic_cont_expand_simple);
2351
2352static int cont_expand_zero(struct file *file, struct address_space *mapping,
2353 loff_t pos, loff_t *bytes)
2354{
2355 struct inode *inode = mapping->host;
2356 unsigned blocksize = 1 << inode->i_blkbits;
2357 struct page *page;
2358 void *fsdata;
2359 pgoff_t index, curidx;
2360 loff_t curpos;
2361 unsigned zerofrom, offset, len;
2362 int err = 0;
2363
2364 index = pos >> PAGE_SHIFT;
2365 offset = pos & ~PAGE_MASK;
2366
2367 while (index > (curidx = (curpos = *bytes)>>PAGE_SHIFT)) {
2368 zerofrom = curpos & ~PAGE_MASK;
2369 if (zerofrom & (blocksize-1)) {
2370 *bytes |= (blocksize-1);
2371 (*bytes)++;
2372 }
2373 len = PAGE_SIZE - zerofrom;
2374
2375 err = pagecache_write_begin(file, mapping, curpos, len,
2376 AOP_FLAG_UNINTERRUPTIBLE,
2377 &page, &fsdata);
2378 if (err)
2379 goto out;
2380 zero_user(page, zerofrom, len);
2381 err = pagecache_write_end(file, mapping, curpos, len, len,
2382 page, fsdata);
2383 if (err < 0)
2384 goto out;
2385 BUG_ON(err != len);
2386 err = 0;
2387
2388 balance_dirty_pages_ratelimited(mapping);
2389
2390 if (unlikely(fatal_signal_pending(current))) {
2391 err = -EINTR;
2392 goto out;
2393 }
2394 }
2395
2396
2397 if (index == curidx) {
2398 zerofrom = curpos & ~PAGE_MASK;
2399
2400 if (offset <= zerofrom) {
2401 goto out;
2402 }
2403 if (zerofrom & (blocksize-1)) {
2404 *bytes |= (blocksize-1);
2405 (*bytes)++;
2406 }
2407 len = offset - zerofrom;
2408
2409 err = pagecache_write_begin(file, mapping, curpos, len,
2410 AOP_FLAG_UNINTERRUPTIBLE,
2411 &page, &fsdata);
2412 if (err)
2413 goto out;
2414 zero_user(page, zerofrom, len);
2415 err = pagecache_write_end(file, mapping, curpos, len, len,
2416 page, fsdata);
2417 if (err < 0)
2418 goto out;
2419 BUG_ON(err != len);
2420 err = 0;
2421 }
2422out:
2423 return err;
2424}
2425
2426
2427
2428
2429
2430int cont_write_begin(struct file *file, struct address_space *mapping,
2431 loff_t pos, unsigned len, unsigned flags,
2432 struct page **pagep, void **fsdata,
2433 get_block_t *get_block, loff_t *bytes)
2434{
2435 struct inode *inode = mapping->host;
2436 unsigned blocksize = 1 << inode->i_blkbits;
2437 unsigned zerofrom;
2438 int err;
2439
2440 err = cont_expand_zero(file, mapping, pos, bytes);
2441 if (err)
2442 return err;
2443
2444 zerofrom = *bytes & ~PAGE_MASK;
2445 if (pos+len > *bytes && zerofrom & (blocksize-1)) {
2446 *bytes |= (blocksize-1);
2447 (*bytes)++;
2448 }
2449
2450 return block_write_begin(mapping, pos, len, flags, pagep, get_block);
2451}
2452EXPORT_SYMBOL(cont_write_begin);
2453
2454int block_commit_write(struct page *page, unsigned from, unsigned to)
2455{
2456 struct inode *inode = page->mapping->host;
2457 __block_commit_write(inode,page,from,to);
2458 return 0;
2459}
2460EXPORT_SYMBOL(block_commit_write);
2461
2462
2463
2464
2465
2466
2467
2468
2469
2470
2471
2472
2473
2474
2475
2476
2477
2478
2479
2480int block_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf,
2481 get_block_t get_block)
2482{
2483 struct page *page = vmf->page;
2484 struct inode *inode = file_inode(vma->vm_file);
2485 unsigned long end;
2486 loff_t size;
2487 int ret;
2488
2489 lock_page(page);
2490 size = i_size_read(inode);
2491 if ((page->mapping != inode->i_mapping) ||
2492 (page_offset(page) > size)) {
2493
2494 ret = -EFAULT;
2495 goto out_unlock;
2496 }
2497
2498
2499 if (((page->index + 1) << PAGE_SHIFT) > size)
2500 end = size & ~PAGE_MASK;
2501 else
2502 end = PAGE_SIZE;
2503
2504 ret = __block_write_begin(page, 0, end, get_block);
2505 if (!ret)
2506 ret = block_commit_write(page, 0, end);
2507
2508 if (unlikely(ret < 0))
2509 goto out_unlock;
2510 set_page_dirty(page);
2511 wait_for_stable_page(page);
2512 return 0;
2513out_unlock:
2514 unlock_page(page);
2515 return ret;
2516}
2517EXPORT_SYMBOL(block_page_mkwrite);
2518
2519
2520
2521
2522
2523
2524static void end_buffer_read_nobh(struct buffer_head *bh, int uptodate)
2525{
2526 __end_buffer_read_notouch(bh, uptodate);
2527}
2528
2529
2530
2531
2532
2533
2534static void attach_nobh_buffers(struct page *page, struct buffer_head *head)
2535{
2536 struct buffer_head *bh;
2537
2538 BUG_ON(!PageLocked(page));
2539
2540 spin_lock(&page->mapping->private_lock);
2541 bh = head;
2542 do {
2543 if (PageDirty(page))
2544 set_buffer_dirty(bh);
2545 if (!bh->b_this_page)
2546 bh->b_this_page = head;
2547 bh = bh->b_this_page;
2548 } while (bh != head);
2549 attach_page_buffers(page, head);
2550 spin_unlock(&page->mapping->private_lock);
2551}
2552
2553
2554
2555
2556
2557
2558int nobh_write_begin(struct address_space *mapping,
2559 loff_t pos, unsigned len, unsigned flags,
2560 struct page **pagep, void **fsdata,
2561 get_block_t *get_block)
2562{
2563 struct inode *inode = mapping->host;
2564 const unsigned blkbits = inode->i_blkbits;
2565 const unsigned blocksize = 1 << blkbits;
2566 struct buffer_head *head, *bh;
2567 struct page *page;
2568 pgoff_t index;
2569 unsigned from, to;
2570 unsigned block_in_page;
2571 unsigned block_start, block_end;
2572 sector_t block_in_file;
2573 int nr_reads = 0;
2574 int ret = 0;
2575 int is_mapped_to_disk = 1;
2576
2577 index = pos >> PAGE_SHIFT;
2578 from = pos & (PAGE_SIZE - 1);
2579 to = from + len;
2580
2581 page = grab_cache_page_write_begin(mapping, index, flags);
2582 if (!page)
2583 return -ENOMEM;
2584 *pagep = page;
2585 *fsdata = NULL;
2586
2587 if (page_has_buffers(page)) {
2588 ret = __block_write_begin(page, pos, len, get_block);
2589 if (unlikely(ret))
2590 goto out_release;
2591 return ret;
2592 }
2593
2594 if (PageMappedToDisk(page))
2595 return 0;
2596
2597
2598
2599
2600
2601
2602
2603
2604
2605
2606 head = alloc_page_buffers(page, blocksize, 0);
2607 if (!head) {
2608 ret = -ENOMEM;
2609 goto out_release;
2610 }
2611
2612 block_in_file = (sector_t)page->index << (PAGE_SHIFT - blkbits);
2613
2614
2615
2616
2617
2618
2619 for (block_start = 0, block_in_page = 0, bh = head;
2620 block_start < PAGE_SIZE;
2621 block_in_page++, block_start += blocksize, bh = bh->b_this_page) {
2622 int create;
2623
2624 block_end = block_start + blocksize;
2625 bh->b_state = 0;
2626 create = 1;
2627 if (block_start >= to)
2628 create = 0;
2629 ret = get_block(inode, block_in_file + block_in_page,
2630 bh, create);
2631 if (ret)
2632 goto failed;
2633 if (!buffer_mapped(bh))
2634 is_mapped_to_disk = 0;
2635 if (buffer_new(bh))
2636 unmap_underlying_metadata(bh->b_bdev, bh->b_blocknr);
2637 if (PageUptodate(page)) {
2638 set_buffer_uptodate(bh);
2639 continue;
2640 }
2641 if (buffer_new(bh) || !buffer_mapped(bh)) {
2642 zero_user_segments(page, block_start, from,
2643 to, block_end);
2644 continue;
2645 }
2646 if (buffer_uptodate(bh))
2647 continue;
2648 if (block_start < from || block_end > to) {
2649 lock_buffer(bh);
2650 bh->b_end_io = end_buffer_read_nobh;
2651 submit_bh(REQ_OP_READ, 0, bh);
2652 nr_reads++;
2653 }
2654 }
2655
2656 if (nr_reads) {
2657
2658
2659
2660
2661
2662 for (bh = head; bh; bh = bh->b_this_page) {
2663 wait_on_buffer(bh);
2664 if (!buffer_uptodate(bh))
2665 ret = -EIO;
2666 }
2667 if (ret)
2668 goto failed;
2669 }
2670
2671 if (is_mapped_to_disk)
2672 SetPageMappedToDisk(page);
2673
2674 *fsdata = head;
2675
2676 return 0;
2677
2678failed:
2679 BUG_ON(!ret);
2680
2681
2682
2683
2684
2685
2686
2687 attach_nobh_buffers(page, head);
2688 page_zero_new_buffers(page, from, to);
2689
2690out_release:
2691 unlock_page(page);
2692 put_page(page);
2693 *pagep = NULL;
2694
2695 return ret;
2696}
2697EXPORT_SYMBOL(nobh_write_begin);
2698
2699int nobh_write_end(struct file *file, struct address_space *mapping,
2700 loff_t pos, unsigned len, unsigned copied,
2701 struct page *page, void *fsdata)
2702{
2703 struct inode *inode = page->mapping->host;
2704 struct buffer_head *head = fsdata;
2705 struct buffer_head *bh;
2706 BUG_ON(fsdata != NULL && page_has_buffers(page));
2707
2708 if (unlikely(copied < len) && head)
2709 attach_nobh_buffers(page, head);
2710 if (page_has_buffers(page))
2711 return generic_write_end(file, mapping, pos, len,
2712 copied, page, fsdata);
2713
2714 SetPageUptodate(page);
2715 set_page_dirty(page);
2716 if (pos+copied > inode->i_size) {
2717 i_size_write(inode, pos+copied);
2718 mark_inode_dirty(inode);
2719 }
2720
2721 unlock_page(page);
2722 put_page(page);
2723
2724 while (head) {
2725 bh = head;
2726 head = head->b_this_page;
2727 free_buffer_head(bh);
2728 }
2729
2730 return copied;
2731}
2732EXPORT_SYMBOL(nobh_write_end);
2733
2734
2735
2736
2737
2738
2739int nobh_writepage(struct page *page, get_block_t *get_block,
2740 struct writeback_control *wbc)
2741{
2742 struct inode * const inode = page->mapping->host;
2743 loff_t i_size = i_size_read(inode);
2744 const pgoff_t end_index = i_size >> PAGE_SHIFT;
2745 unsigned offset;
2746 int ret;
2747
2748
2749 if (page->index < end_index)
2750 goto out;
2751
2752
2753 offset = i_size & (PAGE_SIZE-1);
2754 if (page->index >= end_index+1 || !offset) {
2755
2756
2757
2758
2759
2760#if 0
2761
2762 if (page->mapping->a_ops->invalidatepage)
2763 page->mapping->a_ops->invalidatepage(page, offset);
2764#endif
2765 unlock_page(page);
2766 return 0;
2767 }
2768
2769
2770
2771
2772
2773
2774
2775
2776 zero_user_segment(page, offset, PAGE_SIZE);
2777out:
2778 ret = mpage_writepage(page, get_block, wbc);
2779 if (ret == -EAGAIN)
2780 ret = __block_write_full_page(inode, page, get_block, wbc,
2781 end_buffer_async_write);
2782 return ret;
2783}
2784EXPORT_SYMBOL(nobh_writepage);
2785
2786int nobh_truncate_page(struct address_space *mapping,
2787 loff_t from, get_block_t *get_block)
2788{
2789 pgoff_t index = from >> PAGE_SHIFT;
2790 unsigned offset = from & (PAGE_SIZE-1);
2791 unsigned blocksize;
2792 sector_t iblock;
2793 unsigned length, pos;
2794 struct inode *inode = mapping->host;
2795 struct page *page;
2796 struct buffer_head map_bh;
2797 int err;
2798
2799 blocksize = 1 << inode->i_blkbits;
2800 length = offset & (blocksize - 1);
2801
2802
2803 if (!length)
2804 return 0;
2805
2806 length = blocksize - length;
2807 iblock = (sector_t)index << (PAGE_SHIFT - inode->i_blkbits);
2808
2809 page = grab_cache_page(mapping, index);
2810 err = -ENOMEM;
2811 if (!page)
2812 goto out;
2813
2814 if (page_has_buffers(page)) {
2815has_buffers:
2816 unlock_page(page);
2817 put_page(page);
2818 return block_truncate_page(mapping, from, get_block);
2819 }
2820
2821
2822 pos = blocksize;
2823 while (offset >= pos) {
2824 iblock++;
2825 pos += blocksize;
2826 }
2827
2828 map_bh.b_size = blocksize;
2829 map_bh.b_state = 0;
2830 err = get_block(inode, iblock, &map_bh, 0);
2831 if (err)
2832 goto unlock;
2833
2834 if (!buffer_mapped(&map_bh))
2835 goto unlock;
2836
2837
2838 if (!PageUptodate(page)) {
2839 err = mapping->a_ops->readpage(NULL, page);
2840 if (err) {
2841 put_page(page);
2842 goto out;
2843 }
2844 lock_page(page);
2845 if (!PageUptodate(page)) {
2846 err = -EIO;
2847 goto unlock;
2848 }
2849 if (page_has_buffers(page))
2850 goto has_buffers;
2851 }
2852 zero_user(page, offset, length);
2853 set_page_dirty(page);
2854 err = 0;
2855
2856unlock:
2857 unlock_page(page);
2858 put_page(page);
2859out:
2860 return err;
2861}
2862EXPORT_SYMBOL(nobh_truncate_page);
2863
2864int block_truncate_page(struct address_space *mapping,
2865 loff_t from, get_block_t *get_block)
2866{
2867 pgoff_t index = from >> PAGE_SHIFT;
2868 unsigned offset = from & (PAGE_SIZE-1);
2869 unsigned blocksize;
2870 sector_t iblock;
2871 unsigned length, pos;
2872 struct inode *inode = mapping->host;
2873 struct page *page;
2874 struct buffer_head *bh;
2875 int err;
2876
2877 blocksize = 1 << inode->i_blkbits;
2878 length = offset & (blocksize - 1);
2879
2880
2881 if (!length)
2882 return 0;
2883
2884 length = blocksize - length;
2885 iblock = (sector_t)index << (PAGE_SHIFT - inode->i_blkbits);
2886
2887 page = grab_cache_page(mapping, index);
2888 err = -ENOMEM;
2889 if (!page)
2890 goto out;
2891
2892 if (!page_has_buffers(page))
2893 create_empty_buffers(page, blocksize, 0);
2894
2895
2896 bh = page_buffers(page);
2897 pos = blocksize;
2898 while (offset >= pos) {
2899 bh = bh->b_this_page;
2900 iblock++;
2901 pos += blocksize;
2902 }
2903
2904 err = 0;
2905 if (!buffer_mapped(bh)) {
2906 WARN_ON(bh->b_size != blocksize);
2907 err = get_block(inode, iblock, bh, 0);
2908 if (err)
2909 goto unlock;
2910
2911 if (!buffer_mapped(bh))
2912 goto unlock;
2913 }
2914
2915
2916 if (PageUptodate(page))
2917 set_buffer_uptodate(bh);
2918
2919 if (!buffer_uptodate(bh) && !buffer_delay(bh) && !buffer_unwritten(bh)) {
2920 err = -EIO;
2921 ll_rw_block(REQ_OP_READ, 0, 1, &bh);
2922 wait_on_buffer(bh);
2923
2924 if (!buffer_uptodate(bh))
2925 goto unlock;
2926 }
2927
2928 zero_user(page, offset, length);
2929 mark_buffer_dirty(bh);
2930 err = 0;
2931
2932unlock:
2933 unlock_page(page);
2934 put_page(page);
2935out:
2936 return err;
2937}
2938EXPORT_SYMBOL(block_truncate_page);
2939
2940
2941
2942
2943int block_write_full_page(struct page *page, get_block_t *get_block,
2944 struct writeback_control *wbc)
2945{
2946 struct inode * const inode = page->mapping->host;
2947 loff_t i_size = i_size_read(inode);
2948 const pgoff_t end_index = i_size >> PAGE_SHIFT;
2949 unsigned offset;
2950
2951
2952 if (page->index < end_index)
2953 return __block_write_full_page(inode, page, get_block, wbc,
2954 end_buffer_async_write);
2955
2956
2957 offset = i_size & (PAGE_SIZE-1);
2958 if (page->index >= end_index+1 || !offset) {
2959
2960
2961
2962
2963
2964 do_invalidatepage(page, 0, PAGE_SIZE);
2965 unlock_page(page);
2966 return 0;
2967 }
2968
2969
2970
2971
2972
2973
2974
2975
2976 zero_user_segment(page, offset, PAGE_SIZE);
2977 return __block_write_full_page(inode, page, get_block, wbc,
2978 end_buffer_async_write);
2979}
2980EXPORT_SYMBOL(block_write_full_page);
2981
2982sector_t generic_block_bmap(struct address_space *mapping, sector_t block,
2983 get_block_t *get_block)
2984{
2985 struct buffer_head tmp;
2986 struct inode *inode = mapping->host;
2987 tmp.b_state = 0;
2988 tmp.b_blocknr = 0;
2989 tmp.b_size = 1 << inode->i_blkbits;
2990 get_block(inode, block, &tmp, 0);
2991 return tmp.b_blocknr;
2992}
2993EXPORT_SYMBOL(generic_block_bmap);
2994
2995static void end_bio_bh_io_sync(struct bio *bio)
2996{
2997 struct buffer_head *bh = bio->bi_private;
2998
2999 if (unlikely(bio_flagged(bio, BIO_QUIET)))
3000 set_bit(BH_Quiet, &bh->b_state);
3001
3002 bh->b_end_io(bh, !bio->bi_error);
3003 bio_put(bio);
3004}
3005
3006
3007
3008
3009
3010
3011
3012
3013
3014
3015
3016
3017
3018void guard_bio_eod(int op, struct bio *bio)
3019{
3020 sector_t maxsector;
3021 struct bio_vec *bvec = &bio->bi_io_vec[bio->bi_vcnt - 1];
3022 unsigned truncated_bytes;
3023
3024 maxsector = i_size_read(bio->bi_bdev->bd_inode) >> 9;
3025 if (!maxsector)
3026 return;
3027
3028
3029
3030
3031
3032
3033 if (unlikely(bio->bi_iter.bi_sector >= maxsector))
3034 return;
3035
3036 maxsector -= bio->bi_iter.bi_sector;
3037 if (likely((bio->bi_iter.bi_size >> 9) <= maxsector))
3038 return;
3039
3040
3041 truncated_bytes = bio->bi_iter.bi_size - (maxsector << 9);
3042
3043
3044 bio->bi_iter.bi_size -= truncated_bytes;
3045 bvec->bv_len -= truncated_bytes;
3046
3047
3048 if (op == REQ_OP_READ) {
3049 zero_user(bvec->bv_page, bvec->bv_offset + bvec->bv_len,
3050 truncated_bytes);
3051 }
3052}
3053
3054static int submit_bh_wbc(int op, int op_flags, struct buffer_head *bh,
3055 unsigned long bio_flags, struct writeback_control *wbc)
3056{
3057 struct bio *bio;
3058
3059 BUG_ON(!buffer_locked(bh));
3060 BUG_ON(!buffer_mapped(bh));
3061 BUG_ON(!bh->b_end_io);
3062 BUG_ON(buffer_delay(bh));
3063 BUG_ON(buffer_unwritten(bh));
3064
3065
3066
3067
3068 if (test_set_buffer_req(bh) && (op == REQ_OP_WRITE))
3069 clear_buffer_write_io_error(bh);
3070
3071
3072
3073
3074
3075 bio = bio_alloc(GFP_NOIO, 1);
3076
3077 if (wbc) {
3078 wbc_init_bio(wbc, bio);
3079 wbc_account_io(wbc, bh->b_page, bh->b_size);
3080 }
3081
3082 bio->bi_iter.bi_sector = bh->b_blocknr * (bh->b_size >> 9);
3083 bio->bi_bdev = bh->b_bdev;
3084
3085 bio_add_page(bio, bh->b_page, bh->b_size, bh_offset(bh));
3086 BUG_ON(bio->bi_iter.bi_size != bh->b_size);
3087
3088 bio->bi_end_io = end_bio_bh_io_sync;
3089 bio->bi_private = bh;
3090 bio->bi_flags |= bio_flags;
3091
3092
3093 guard_bio_eod(op, bio);
3094
3095 if (buffer_meta(bh))
3096 op_flags |= REQ_META;
3097 if (buffer_prio(bh))
3098 op_flags |= REQ_PRIO;
3099 bio_set_op_attrs(bio, op, op_flags);
3100
3101 submit_bio(bio);
3102 return 0;
3103}
3104
3105int _submit_bh(int op, int op_flags, struct buffer_head *bh,
3106 unsigned long bio_flags)
3107{
3108 return submit_bh_wbc(op, op_flags, bh, bio_flags, NULL);
3109}
3110EXPORT_SYMBOL_GPL(_submit_bh);
3111
3112int submit_bh(int op, int op_flags, struct buffer_head *bh)
3113{
3114 return submit_bh_wbc(op, op_flags, bh, 0, NULL);
3115}
3116EXPORT_SYMBOL(submit_bh);
3117
3118
3119
3120
3121
3122
3123
3124
3125
3126
3127
3128
3129
3130
3131
3132
3133
3134
3135
3136
3137
3138
3139
3140
3141
3142
3143
3144void ll_rw_block(int op, int op_flags, int nr, struct buffer_head *bhs[])
3145{
3146 int i;
3147
3148 for (i = 0; i < nr; i++) {
3149 struct buffer_head *bh = bhs[i];
3150
3151 if (!trylock_buffer(bh))
3152 continue;
3153 if (op == WRITE) {
3154 if (test_clear_buffer_dirty(bh)) {
3155 bh->b_end_io = end_buffer_write_sync;
3156 get_bh(bh);
3157 submit_bh(op, op_flags, bh);
3158 continue;
3159 }
3160 } else {
3161 if (!buffer_uptodate(bh)) {
3162 bh->b_end_io = end_buffer_read_sync;
3163 get_bh(bh);
3164 submit_bh(op, op_flags, bh);
3165 continue;
3166 }
3167 }
3168 unlock_buffer(bh);
3169 }
3170}
3171EXPORT_SYMBOL(ll_rw_block);
3172
3173void write_dirty_buffer(struct buffer_head *bh, int op_flags)
3174{
3175 lock_buffer(bh);
3176 if (!test_clear_buffer_dirty(bh)) {
3177 unlock_buffer(bh);
3178 return;
3179 }
3180 bh->b_end_io = end_buffer_write_sync;
3181 get_bh(bh);
3182 submit_bh(REQ_OP_WRITE, op_flags, bh);
3183}
3184EXPORT_SYMBOL(write_dirty_buffer);
3185
3186
3187
3188
3189
3190
3191int __sync_dirty_buffer(struct buffer_head *bh, int op_flags)
3192{
3193 int ret = 0;
3194
3195 WARN_ON(atomic_read(&bh->b_count) < 1);
3196 lock_buffer(bh);
3197 if (test_clear_buffer_dirty(bh)) {
3198 get_bh(bh);
3199 bh->b_end_io = end_buffer_write_sync;
3200 ret = submit_bh(REQ_OP_WRITE, op_flags, bh);
3201 wait_on_buffer(bh);
3202 if (!ret && !buffer_uptodate(bh))
3203 ret = -EIO;
3204 } else {
3205 unlock_buffer(bh);
3206 }
3207 return ret;
3208}
3209EXPORT_SYMBOL(__sync_dirty_buffer);
3210
3211int sync_dirty_buffer(struct buffer_head *bh)
3212{
3213 return __sync_dirty_buffer(bh, WRITE_SYNC);
3214}
3215EXPORT_SYMBOL(sync_dirty_buffer);
3216
3217
3218
3219
3220
3221
3222
3223
3224
3225
3226
3227
3228
3229
3230
3231
3232
3233
3234
3235
3236
3237static inline int buffer_busy(struct buffer_head *bh)
3238{
3239 return atomic_read(&bh->b_count) |
3240 (bh->b_state & ((1 << BH_Dirty) | (1 << BH_Lock)));
3241}
3242
3243static int
3244drop_buffers(struct page *page, struct buffer_head **buffers_to_free)
3245{
3246 struct buffer_head *head = page_buffers(page);
3247 struct buffer_head *bh;
3248
3249 bh = head;
3250 do {
3251 if (buffer_write_io_error(bh) && page->mapping)
3252 mapping_set_error(page->mapping, -EIO);
3253 if (buffer_busy(bh))
3254 goto failed;
3255 bh = bh->b_this_page;
3256 } while (bh != head);
3257
3258 do {
3259 struct buffer_head *next = bh->b_this_page;
3260
3261 if (bh->b_assoc_map)
3262 __remove_assoc_queue(bh);
3263 bh = next;
3264 } while (bh != head);
3265 *buffers_to_free = head;
3266 __clear_page_buffers(page);
3267 return 1;
3268failed:
3269 return 0;
3270}
3271
3272int try_to_free_buffers(struct page *page)
3273{
3274 struct address_space * const mapping = page->mapping;
3275 struct buffer_head *buffers_to_free = NULL;
3276 int ret = 0;
3277
3278 BUG_ON(!PageLocked(page));
3279 if (PageWriteback(page))
3280 return 0;
3281
3282 if (mapping == NULL) {
3283 ret = drop_buffers(page, &buffers_to_free);
3284 goto out;
3285 }
3286
3287 spin_lock(&mapping->private_lock);
3288 ret = drop_buffers(page, &buffers_to_free);
3289
3290
3291
3292
3293
3294
3295
3296
3297
3298
3299
3300
3301
3302
3303
3304 if (ret)
3305 cancel_dirty_page(page);
3306 spin_unlock(&mapping->private_lock);
3307out:
3308 if (buffers_to_free) {
3309 struct buffer_head *bh = buffers_to_free;
3310
3311 do {
3312 struct buffer_head *next = bh->b_this_page;
3313 free_buffer_head(bh);
3314 bh = next;
3315 } while (bh != buffers_to_free);
3316 }
3317 return ret;
3318}
3319EXPORT_SYMBOL(try_to_free_buffers);
3320
3321
3322
3323
3324
3325
3326
3327
3328SYSCALL_DEFINE2(bdflush, int, func, long, data)
3329{
3330 static int msg_count;
3331
3332 if (!capable(CAP_SYS_ADMIN))
3333 return -EPERM;
3334
3335 if (msg_count < 5) {
3336 msg_count++;
3337 printk(KERN_INFO
3338 "warning: process `%s' used the obsolete bdflush"
3339 " system call\n", current->comm);
3340 printk(KERN_INFO "Fix your initscripts?\n");
3341 }
3342
3343 if (func == 1)
3344 do_exit(0);
3345 return 0;
3346}
3347
3348
3349
3350
3351static struct kmem_cache *bh_cachep __read_mostly;
3352
3353
3354
3355
3356
3357static unsigned long max_buffer_heads;
3358
3359int buffer_heads_over_limit;
3360
3361struct bh_accounting {
3362 int nr;
3363 int ratelimit;
3364};
3365
3366static DEFINE_PER_CPU(struct bh_accounting, bh_accounting) = {0, 0};
3367
3368static void recalc_bh_state(void)
3369{
3370 int i;
3371 int tot = 0;
3372
3373 if (__this_cpu_inc_return(bh_accounting.ratelimit) - 1 < 4096)
3374 return;
3375 __this_cpu_write(bh_accounting.ratelimit, 0);
3376 for_each_online_cpu(i)
3377 tot += per_cpu(bh_accounting, i).nr;
3378 buffer_heads_over_limit = (tot > max_buffer_heads);
3379}
3380
3381struct buffer_head *alloc_buffer_head(gfp_t gfp_flags)
3382{
3383 struct buffer_head *ret = kmem_cache_zalloc(bh_cachep, gfp_flags);
3384 if (ret) {
3385 INIT_LIST_HEAD(&ret->b_assoc_buffers);
3386 preempt_disable();
3387 __this_cpu_inc(bh_accounting.nr);
3388 recalc_bh_state();
3389 preempt_enable();
3390 }
3391 return ret;
3392}
3393EXPORT_SYMBOL(alloc_buffer_head);
3394
3395void free_buffer_head(struct buffer_head *bh)
3396{
3397 BUG_ON(!list_empty(&bh->b_assoc_buffers));
3398 kmem_cache_free(bh_cachep, bh);
3399 preempt_disable();
3400 __this_cpu_dec(bh_accounting.nr);
3401 recalc_bh_state();
3402 preempt_enable();
3403}
3404EXPORT_SYMBOL(free_buffer_head);
3405
3406static void buffer_exit_cpu(int cpu)
3407{
3408 int i;
3409 struct bh_lru *b = &per_cpu(bh_lrus, cpu);
3410
3411 for (i = 0; i < BH_LRU_SIZE; i++) {
3412 brelse(b->bhs[i]);
3413 b->bhs[i] = NULL;
3414 }
3415 this_cpu_add(bh_accounting.nr, per_cpu(bh_accounting, cpu).nr);
3416 per_cpu(bh_accounting, cpu).nr = 0;
3417}
3418
3419static int buffer_cpu_notify(struct notifier_block *self,
3420 unsigned long action, void *hcpu)
3421{
3422 if (action == CPU_DEAD || action == CPU_DEAD_FROZEN)
3423 buffer_exit_cpu((unsigned long)hcpu);
3424 return NOTIFY_OK;
3425}
3426
3427
3428
3429
3430
3431
3432
3433
3434int bh_uptodate_or_lock(struct buffer_head *bh)
3435{
3436 if (!buffer_uptodate(bh)) {
3437 lock_buffer(bh);
3438 if (!buffer_uptodate(bh))
3439 return 0;
3440 unlock_buffer(bh);
3441 }
3442 return 1;
3443}
3444EXPORT_SYMBOL(bh_uptodate_or_lock);
3445
3446
3447
3448
3449
3450
3451
3452int bh_submit_read(struct buffer_head *bh)
3453{
3454 BUG_ON(!buffer_locked(bh));
3455
3456 if (buffer_uptodate(bh)) {
3457 unlock_buffer(bh);
3458 return 0;
3459 }
3460
3461 get_bh(bh);
3462 bh->b_end_io = end_buffer_read_sync;
3463 submit_bh(REQ_OP_READ, 0, bh);
3464 wait_on_buffer(bh);
3465 if (buffer_uptodate(bh))
3466 return 0;
3467 return -EIO;
3468}
3469EXPORT_SYMBOL(bh_submit_read);
3470
3471void __init buffer_init(void)
3472{
3473 unsigned long nrpages;
3474
3475 bh_cachep = kmem_cache_create("buffer_head",
3476 sizeof(struct buffer_head), 0,
3477 (SLAB_RECLAIM_ACCOUNT|SLAB_PANIC|
3478 SLAB_MEM_SPREAD),
3479 NULL);
3480
3481
3482
3483
3484 nrpages = (nr_free_buffer_pages() * 10) / 100;
3485 max_buffer_heads = nrpages * (PAGE_SIZE / sizeof(struct buffer_head));
3486 hotcpu_notifier(buffer_cpu_notify, 0);
3487}
3488