1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21#include <linux/kernel.h>
22#include <linux/syscalls.h>
23#include <linux/fs.h>
24#include <linux/mm.h>
25#include <linux/percpu.h>
26#include <linux/slab.h>
27#include <linux/capability.h>
28#include <linux/blkdev.h>
29#include <linux/file.h>
30#include <linux/quotaops.h>
31#include <linux/highmem.h>
32#include <linux/export.h>
33#include <linux/backing-dev.h>
34#include <linux/writeback.h>
35#include <linux/hash.h>
36#include <linux/suspend.h>
37#include <linux/buffer_head.h>
38#include <linux/task_io_accounting_ops.h>
39#include <linux/bio.h>
40#include <linux/notifier.h>
41#include <linux/cpu.h>
42#include <linux/bitops.h>
43#include <linux/mpage.h>
44#include <linux/bit_spinlock.h>
45#include <trace/events/block.h>
46
47static int fsync_buffers_list(spinlock_t *lock, struct list_head *list);
48static int submit_bh_wbc(int rw, struct buffer_head *bh,
49 unsigned long bio_flags,
50 struct writeback_control *wbc);
51
52#define BH_ENTRY(list) list_entry((list), struct buffer_head, b_assoc_buffers)
53
54void init_buffer(struct buffer_head *bh, bh_end_io_t *handler, void *private)
55{
56 bh->b_end_io = handler;
57 bh->b_private = private;
58}
59EXPORT_SYMBOL(init_buffer);
60
61inline void touch_buffer(struct buffer_head *bh)
62{
63 trace_block_touch_buffer(bh);
64 mark_page_accessed(bh->b_page);
65}
66EXPORT_SYMBOL(touch_buffer);
67
68void __lock_buffer(struct buffer_head *bh)
69{
70 wait_on_bit_lock_io(&bh->b_state, BH_Lock, TASK_UNINTERRUPTIBLE);
71}
72EXPORT_SYMBOL(__lock_buffer);
73
74void unlock_buffer(struct buffer_head *bh)
75{
76 clear_bit_unlock(BH_Lock, &bh->b_state);
77 smp_mb__after_atomic();
78 wake_up_bit(&bh->b_state, BH_Lock);
79}
80EXPORT_SYMBOL(unlock_buffer);
81
82
83
84
85
86
87void buffer_check_dirty_writeback(struct page *page,
88 bool *dirty, bool *writeback)
89{
90 struct buffer_head *head, *bh;
91 *dirty = false;
92 *writeback = false;
93
94 BUG_ON(!PageLocked(page));
95
96 if (!page_has_buffers(page))
97 return;
98
99 if (PageWriteback(page))
100 *writeback = true;
101
102 head = page_buffers(page);
103 bh = head;
104 do {
105 if (buffer_locked(bh))
106 *writeback = true;
107
108 if (buffer_dirty(bh))
109 *dirty = true;
110
111 bh = bh->b_this_page;
112 } while (bh != head);
113}
114EXPORT_SYMBOL(buffer_check_dirty_writeback);
115
116
117
118
119
120
121void __wait_on_buffer(struct buffer_head * bh)
122{
123 wait_on_bit_io(&bh->b_state, BH_Lock, TASK_UNINTERRUPTIBLE);
124}
125EXPORT_SYMBOL(__wait_on_buffer);
126
127static void
128__clear_page_buffers(struct page *page)
129{
130 ClearPagePrivate(page);
131 set_page_private(page, 0);
132 page_cache_release(page);
133}
134
135static void buffer_io_error(struct buffer_head *bh, char *msg)
136{
137 if (!test_bit(BH_Quiet, &bh->b_state))
138 printk_ratelimited(KERN_ERR
139 "Buffer I/O error on dev %pg, logical block %llu%s\n",
140 bh->b_bdev, (unsigned long long)bh->b_blocknr, msg);
141}
142
143
144
145
146
147
148
149
150
151static void __end_buffer_read_notouch(struct buffer_head *bh, int uptodate)
152{
153 if (uptodate) {
154 set_buffer_uptodate(bh);
155 } else {
156
157 clear_buffer_uptodate(bh);
158 }
159 unlock_buffer(bh);
160}
161
162
163
164
165
166void end_buffer_read_sync(struct buffer_head *bh, int uptodate)
167{
168 __end_buffer_read_notouch(bh, uptodate);
169 put_bh(bh);
170}
171EXPORT_SYMBOL(end_buffer_read_sync);
172
173void end_buffer_write_sync(struct buffer_head *bh, int uptodate)
174{
175 if (uptodate) {
176 set_buffer_uptodate(bh);
177 } else {
178 buffer_io_error(bh, ", lost sync page write");
179 set_buffer_write_io_error(bh);
180 clear_buffer_uptodate(bh);
181 }
182 unlock_buffer(bh);
183 put_bh(bh);
184}
185EXPORT_SYMBOL(end_buffer_write_sync);
186
187
188
189
190
191
192
193
194
195
196
197
198static struct buffer_head *
199__find_get_block_slow(struct block_device *bdev, sector_t block)
200{
201 struct inode *bd_inode = bdev->bd_inode;
202 struct address_space *bd_mapping = bd_inode->i_mapping;
203 struct buffer_head *ret = NULL;
204 pgoff_t index;
205 struct buffer_head *bh;
206 struct buffer_head *head;
207 struct page *page;
208 int all_mapped = 1;
209
210 index = block >> (PAGE_CACHE_SHIFT - bd_inode->i_blkbits);
211 page = find_get_page_flags(bd_mapping, index, FGP_ACCESSED);
212 if (!page)
213 goto out;
214
215 spin_lock(&bd_mapping->private_lock);
216 if (!page_has_buffers(page))
217 goto out_unlock;
218 head = page_buffers(page);
219 bh = head;
220 do {
221 if (!buffer_mapped(bh))
222 all_mapped = 0;
223 else if (bh->b_blocknr == block) {
224 ret = bh;
225 get_bh(bh);
226 goto out_unlock;
227 }
228 bh = bh->b_this_page;
229 } while (bh != head);
230
231
232
233
234
235
236 if (all_mapped) {
237 printk("__find_get_block_slow() failed. "
238 "block=%llu, b_blocknr=%llu\n",
239 (unsigned long long)block,
240 (unsigned long long)bh->b_blocknr);
241 printk("b_state=0x%08lx, b_size=%zu\n",
242 bh->b_state, bh->b_size);
243 printk("device %pg blocksize: %d\n", bdev,
244 1 << bd_inode->i_blkbits);
245 }
246out_unlock:
247 spin_unlock(&bd_mapping->private_lock);
248 page_cache_release(page);
249out:
250 return ret;
251}
252
253
254
255
256static void free_more_memory(void)
257{
258 struct zone *zone;
259 int nid;
260
261 wakeup_flusher_threads(1024, WB_REASON_FREE_MORE_MEM);
262 yield();
263
264 for_each_online_node(nid) {
265 (void)first_zones_zonelist(node_zonelist(nid, GFP_NOFS),
266 gfp_zone(GFP_NOFS), NULL,
267 &zone);
268 if (zone)
269 try_to_free_pages(node_zonelist(nid, GFP_NOFS), 0,
270 GFP_NOFS, NULL);
271 }
272}
273
274
275
276
277
278static void end_buffer_async_read(struct buffer_head *bh, int uptodate)
279{
280 unsigned long flags;
281 struct buffer_head *first;
282 struct buffer_head *tmp;
283 struct page *page;
284 int page_uptodate = 1;
285
286 BUG_ON(!buffer_async_read(bh));
287
288 page = bh->b_page;
289 if (uptodate) {
290 set_buffer_uptodate(bh);
291 } else {
292 clear_buffer_uptodate(bh);
293 buffer_io_error(bh, ", async page read");
294 SetPageError(page);
295 }
296
297
298
299
300
301
302 first = page_buffers(page);
303 local_irq_save(flags);
304 bit_spin_lock(BH_Uptodate_Lock, &first->b_state);
305 clear_buffer_async_read(bh);
306 unlock_buffer(bh);
307 tmp = bh;
308 do {
309 if (!buffer_uptodate(tmp))
310 page_uptodate = 0;
311 if (buffer_async_read(tmp)) {
312 BUG_ON(!buffer_locked(tmp));
313 goto still_busy;
314 }
315 tmp = tmp->b_this_page;
316 } while (tmp != bh);
317 bit_spin_unlock(BH_Uptodate_Lock, &first->b_state);
318 local_irq_restore(flags);
319
320
321
322
323
324 if (page_uptodate && !PageError(page))
325 SetPageUptodate(page);
326 unlock_page(page);
327 return;
328
329still_busy:
330 bit_spin_unlock(BH_Uptodate_Lock, &first->b_state);
331 local_irq_restore(flags);
332 return;
333}
334
335
336
337
338
339void end_buffer_async_write(struct buffer_head *bh, int uptodate)
340{
341 unsigned long flags;
342 struct buffer_head *first;
343 struct buffer_head *tmp;
344 struct page *page;
345
346 BUG_ON(!buffer_async_write(bh));
347
348 page = bh->b_page;
349 if (uptodate) {
350 set_buffer_uptodate(bh);
351 } else {
352 buffer_io_error(bh, ", lost async page write");
353 set_bit(AS_EIO, &page->mapping->flags);
354 set_buffer_write_io_error(bh);
355 clear_buffer_uptodate(bh);
356 SetPageError(page);
357 }
358
359 first = page_buffers(page);
360 local_irq_save(flags);
361 bit_spin_lock(BH_Uptodate_Lock, &first->b_state);
362
363 clear_buffer_async_write(bh);
364 unlock_buffer(bh);
365 tmp = bh->b_this_page;
366 while (tmp != bh) {
367 if (buffer_async_write(tmp)) {
368 BUG_ON(!buffer_locked(tmp));
369 goto still_busy;
370 }
371 tmp = tmp->b_this_page;
372 }
373 bit_spin_unlock(BH_Uptodate_Lock, &first->b_state);
374 local_irq_restore(flags);
375 end_page_writeback(page);
376 return;
377
378still_busy:
379 bit_spin_unlock(BH_Uptodate_Lock, &first->b_state);
380 local_irq_restore(flags);
381 return;
382}
383EXPORT_SYMBOL(end_buffer_async_write);
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406static void mark_buffer_async_read(struct buffer_head *bh)
407{
408 bh->b_end_io = end_buffer_async_read;
409 set_buffer_async_read(bh);
410}
411
412static void mark_buffer_async_write_endio(struct buffer_head *bh,
413 bh_end_io_t *handler)
414{
415 bh->b_end_io = handler;
416 set_buffer_async_write(bh);
417}
418
419void mark_buffer_async_write(struct buffer_head *bh)
420{
421 mark_buffer_async_write_endio(bh, end_buffer_async_write);
422}
423EXPORT_SYMBOL(mark_buffer_async_write);
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478static void __remove_assoc_queue(struct buffer_head *bh)
479{
480 list_del_init(&bh->b_assoc_buffers);
481 WARN_ON(!bh->b_assoc_map);
482 if (buffer_write_io_error(bh))
483 set_bit(AS_EIO, &bh->b_assoc_map->flags);
484 bh->b_assoc_map = NULL;
485}
486
487int inode_has_buffers(struct inode *inode)
488{
489 return !list_empty(&inode->i_data.private_list);
490}
491
492
493
494
495
496
497
498
499
500
501
502static int osync_buffers_list(spinlock_t *lock, struct list_head *list)
503{
504 struct buffer_head *bh;
505 struct list_head *p;
506 int err = 0;
507
508 spin_lock(lock);
509repeat:
510 list_for_each_prev(p, list) {
511 bh = BH_ENTRY(p);
512 if (buffer_locked(bh)) {
513 get_bh(bh);
514 spin_unlock(lock);
515 wait_on_buffer(bh);
516 if (!buffer_uptodate(bh))
517 err = -EIO;
518 brelse(bh);
519 spin_lock(lock);
520 goto repeat;
521 }
522 }
523 spin_unlock(lock);
524 return err;
525}
526
527static void do_thaw_one(struct super_block *sb, void *unused)
528{
529 while (sb->s_bdev && !thaw_bdev(sb->s_bdev, sb))
530 printk(KERN_WARNING "Emergency Thaw on %pg\n", sb->s_bdev);
531}
532
533static void do_thaw_all(struct work_struct *work)
534{
535 iterate_supers(do_thaw_one, NULL);
536 kfree(work);
537 printk(KERN_WARNING "Emergency Thaw complete\n");
538}
539
540
541
542
543
544
545void emergency_thaw_all(void)
546{
547 struct work_struct *work;
548
549 work = kmalloc(sizeof(*work), GFP_ATOMIC);
550 if (work) {
551 INIT_WORK(work, do_thaw_all);
552 schedule_work(work);
553 }
554}
555
556
557
558
559
560
561
562
563
564
565
566
567int sync_mapping_buffers(struct address_space *mapping)
568{
569 struct address_space *buffer_mapping = mapping->private_data;
570
571 if (buffer_mapping == NULL || list_empty(&mapping->private_list))
572 return 0;
573
574 return fsync_buffers_list(&buffer_mapping->private_lock,
575 &mapping->private_list);
576}
577EXPORT_SYMBOL(sync_mapping_buffers);
578
579
580
581
582
583
584
585void write_boundary_block(struct block_device *bdev,
586 sector_t bblock, unsigned blocksize)
587{
588 struct buffer_head *bh = __find_get_block(bdev, bblock + 1, blocksize);
589 if (bh) {
590 if (buffer_dirty(bh))
591 ll_rw_block(WRITE, 1, &bh);
592 put_bh(bh);
593 }
594}
595
596void mark_buffer_dirty_inode(struct buffer_head *bh, struct inode *inode)
597{
598 struct address_space *mapping = inode->i_mapping;
599 struct address_space *buffer_mapping = bh->b_page->mapping;
600
601 mark_buffer_dirty(bh);
602 if (!mapping->private_data) {
603 mapping->private_data = buffer_mapping;
604 } else {
605 BUG_ON(mapping->private_data != buffer_mapping);
606 }
607 if (!bh->b_assoc_map) {
608 spin_lock(&buffer_mapping->private_lock);
609 list_move_tail(&bh->b_assoc_buffers,
610 &mapping->private_list);
611 bh->b_assoc_map = mapping;
612 spin_unlock(&buffer_mapping->private_lock);
613 }
614}
615EXPORT_SYMBOL(mark_buffer_dirty_inode);
616
617
618
619
620
621
622
623
624
625
626static void __set_page_dirty(struct page *page, struct address_space *mapping,
627 struct mem_cgroup *memcg, int warn)
628{
629 unsigned long flags;
630
631 spin_lock_irqsave(&mapping->tree_lock, flags);
632 if (page->mapping) {
633 WARN_ON_ONCE(warn && !PageUptodate(page));
634 account_page_dirtied(page, mapping, memcg);
635 radix_tree_tag_set(&mapping->page_tree,
636 page_index(page), PAGECACHE_TAG_DIRTY);
637 }
638 spin_unlock_irqrestore(&mapping->tree_lock, flags);
639}
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666int __set_page_dirty_buffers(struct page *page)
667{
668 int newly_dirty;
669 struct mem_cgroup *memcg;
670 struct address_space *mapping = page_mapping(page);
671
672 if (unlikely(!mapping))
673 return !TestSetPageDirty(page);
674
675 spin_lock(&mapping->private_lock);
676 if (page_has_buffers(page)) {
677 struct buffer_head *head = page_buffers(page);
678 struct buffer_head *bh = head;
679
680 do {
681 set_buffer_dirty(bh);
682 bh = bh->b_this_page;
683 } while (bh != head);
684 }
685
686
687
688
689 memcg = mem_cgroup_begin_page_stat(page);
690 newly_dirty = !TestSetPageDirty(page);
691 spin_unlock(&mapping->private_lock);
692
693 if (newly_dirty)
694 __set_page_dirty(page, mapping, memcg, 1);
695
696 mem_cgroup_end_page_stat(memcg);
697
698 if (newly_dirty)
699 __mark_inode_dirty(mapping->host, I_DIRTY_PAGES);
700
701 return newly_dirty;
702}
703EXPORT_SYMBOL(__set_page_dirty_buffers);
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724static int fsync_buffers_list(spinlock_t *lock, struct list_head *list)
725{
726 struct buffer_head *bh;
727 struct list_head tmp;
728 struct address_space *mapping;
729 int err = 0, err2;
730 struct blk_plug plug;
731
732 INIT_LIST_HEAD(&tmp);
733 blk_start_plug(&plug);
734
735 spin_lock(lock);
736 while (!list_empty(list)) {
737 bh = BH_ENTRY(list->next);
738 mapping = bh->b_assoc_map;
739 __remove_assoc_queue(bh);
740
741
742 smp_mb();
743 if (buffer_dirty(bh) || buffer_locked(bh)) {
744 list_add(&bh->b_assoc_buffers, &tmp);
745 bh->b_assoc_map = mapping;
746 if (buffer_dirty(bh)) {
747 get_bh(bh);
748 spin_unlock(lock);
749
750
751
752
753
754
755
756 write_dirty_buffer(bh, WRITE_SYNC);
757
758
759
760
761
762
763
764 brelse(bh);
765 spin_lock(lock);
766 }
767 }
768 }
769
770 spin_unlock(lock);
771 blk_finish_plug(&plug);
772 spin_lock(lock);
773
774 while (!list_empty(&tmp)) {
775 bh = BH_ENTRY(tmp.prev);
776 get_bh(bh);
777 mapping = bh->b_assoc_map;
778 __remove_assoc_queue(bh);
779
780
781 smp_mb();
782 if (buffer_dirty(bh)) {
783 list_add(&bh->b_assoc_buffers,
784 &mapping->private_list);
785 bh->b_assoc_map = mapping;
786 }
787 spin_unlock(lock);
788 wait_on_buffer(bh);
789 if (!buffer_uptodate(bh))
790 err = -EIO;
791 brelse(bh);
792 spin_lock(lock);
793 }
794
795 spin_unlock(lock);
796 err2 = osync_buffers_list(lock, list);
797 if (err)
798 return err;
799 else
800 return err2;
801}
802
803
804
805
806
807
808
809
810
811
812void invalidate_inode_buffers(struct inode *inode)
813{
814 if (inode_has_buffers(inode)) {
815 struct address_space *mapping = &inode->i_data;
816 struct list_head *list = &mapping->private_list;
817 struct address_space *buffer_mapping = mapping->private_data;
818
819 spin_lock(&buffer_mapping->private_lock);
820 while (!list_empty(list))
821 __remove_assoc_queue(BH_ENTRY(list->next));
822 spin_unlock(&buffer_mapping->private_lock);
823 }
824}
825EXPORT_SYMBOL(invalidate_inode_buffers);
826
827
828
829
830
831
832
833int remove_inode_buffers(struct inode *inode)
834{
835 int ret = 1;
836
837 if (inode_has_buffers(inode)) {
838 struct address_space *mapping = &inode->i_data;
839 struct list_head *list = &mapping->private_list;
840 struct address_space *buffer_mapping = mapping->private_data;
841
842 spin_lock(&buffer_mapping->private_lock);
843 while (!list_empty(list)) {
844 struct buffer_head *bh = BH_ENTRY(list->next);
845 if (buffer_dirty(bh)) {
846 ret = 0;
847 break;
848 }
849 __remove_assoc_queue(bh);
850 }
851 spin_unlock(&buffer_mapping->private_lock);
852 }
853 return ret;
854}
855
856
857
858
859
860
861
862
863
864
865struct buffer_head *alloc_page_buffers(struct page *page, unsigned long size,
866 int retry)
867{
868 struct buffer_head *bh, *head;
869 long offset;
870
871try_again:
872 head = NULL;
873 offset = PAGE_SIZE;
874 while ((offset -= size) >= 0) {
875 bh = alloc_buffer_head(GFP_NOFS);
876 if (!bh)
877 goto no_grow;
878
879 bh->b_this_page = head;
880 bh->b_blocknr = -1;
881 head = bh;
882
883 bh->b_size = size;
884
885
886 set_bh_page(bh, page, offset);
887 }
888 return head;
889
890
891
892no_grow:
893 if (head) {
894 do {
895 bh = head;
896 head = head->b_this_page;
897 free_buffer_head(bh);
898 } while (head);
899 }
900
901
902
903
904
905
906
907 if (!retry)
908 return NULL;
909
910
911
912
913
914
915
916 free_more_memory();
917 goto try_again;
918}
919EXPORT_SYMBOL_GPL(alloc_page_buffers);
920
921static inline void
922link_dev_buffers(struct page *page, struct buffer_head *head)
923{
924 struct buffer_head *bh, *tail;
925
926 bh = head;
927 do {
928 tail = bh;
929 bh = bh->b_this_page;
930 } while (bh);
931 tail->b_this_page = head;
932 attach_page_buffers(page, head);
933}
934
935static sector_t blkdev_max_block(struct block_device *bdev, unsigned int size)
936{
937 sector_t retval = ~((sector_t)0);
938 loff_t sz = i_size_read(bdev->bd_inode);
939
940 if (sz) {
941 unsigned int sizebits = blksize_bits(size);
942 retval = (sz >> sizebits);
943 }
944 return retval;
945}
946
947
948
949
950static sector_t
951init_page_buffers(struct page *page, struct block_device *bdev,
952 sector_t block, int size)
953{
954 struct buffer_head *head = page_buffers(page);
955 struct buffer_head *bh = head;
956 int uptodate = PageUptodate(page);
957 sector_t end_block = blkdev_max_block(I_BDEV(bdev->bd_inode), size);
958
959 do {
960 if (!buffer_mapped(bh)) {
961 init_buffer(bh, NULL, NULL);
962 bh->b_bdev = bdev;
963 bh->b_blocknr = block;
964 if (uptodate)
965 set_buffer_uptodate(bh);
966 if (block < end_block)
967 set_buffer_mapped(bh);
968 }
969 block++;
970 bh = bh->b_this_page;
971 } while (bh != head);
972
973
974
975
976 return end_block;
977}
978
979
980
981
982
983
984static int
985grow_dev_page(struct block_device *bdev, sector_t block,
986 pgoff_t index, int size, int sizebits, gfp_t gfp)
987{
988 struct inode *inode = bdev->bd_inode;
989 struct page *page;
990 struct buffer_head *bh;
991 sector_t end_block;
992 int ret = 0;
993 gfp_t gfp_mask;
994
995 gfp_mask = mapping_gfp_constraint(inode->i_mapping, ~__GFP_FS) | gfp;
996
997
998
999
1000
1001
1002
1003 gfp_mask |= __GFP_NOFAIL;
1004
1005 page = find_or_create_page(inode->i_mapping, index, gfp_mask);
1006 if (!page)
1007 return ret;
1008
1009 BUG_ON(!PageLocked(page));
1010
1011 if (page_has_buffers(page)) {
1012 bh = page_buffers(page);
1013 if (bh->b_size == size) {
1014 end_block = init_page_buffers(page, bdev,
1015 (sector_t)index << sizebits,
1016 size);
1017 goto done;
1018 }
1019 if (!try_to_free_buffers(page))
1020 goto failed;
1021 }
1022
1023
1024
1025
1026 bh = alloc_page_buffers(page, size, 0);
1027 if (!bh)
1028 goto failed;
1029
1030
1031
1032
1033
1034
1035 spin_lock(&inode->i_mapping->private_lock);
1036 link_dev_buffers(page, bh);
1037 end_block = init_page_buffers(page, bdev, (sector_t)index << sizebits,
1038 size);
1039 spin_unlock(&inode->i_mapping->private_lock);
1040done:
1041 ret = (block < end_block) ? 1 : -ENXIO;
1042failed:
1043 unlock_page(page);
1044 page_cache_release(page);
1045 return ret;
1046}
1047
1048
1049
1050
1051
1052static int
1053grow_buffers(struct block_device *bdev, sector_t block, int size, gfp_t gfp)
1054{
1055 pgoff_t index;
1056 int sizebits;
1057
1058 sizebits = -1;
1059 do {
1060 sizebits++;
1061 } while ((size << sizebits) < PAGE_SIZE);
1062
1063 index = block >> sizebits;
1064
1065
1066
1067
1068
1069 if (unlikely(index != block >> sizebits)) {
1070 printk(KERN_ERR "%s: requested out-of-range block %llu for "
1071 "device %pg\n",
1072 __func__, (unsigned long long)block,
1073 bdev);
1074 return -EIO;
1075 }
1076
1077
1078 return grow_dev_page(bdev, block, index, size, sizebits, gfp);
1079}
1080
1081struct buffer_head *
1082__getblk_slow(struct block_device *bdev, sector_t block,
1083 unsigned size, gfp_t gfp)
1084{
1085
1086 if (unlikely(size & (bdev_logical_block_size(bdev)-1) ||
1087 (size < 512 || size > PAGE_SIZE))) {
1088 printk(KERN_ERR "getblk(): invalid block size %d requested\n",
1089 size);
1090 printk(KERN_ERR "logical block size: %d\n",
1091 bdev_logical_block_size(bdev));
1092
1093 dump_stack();
1094 return NULL;
1095 }
1096
1097 for (;;) {
1098 struct buffer_head *bh;
1099 int ret;
1100
1101 bh = __find_get_block(bdev, block, size);
1102 if (bh)
1103 return bh;
1104
1105 ret = grow_buffers(bdev, block, size, gfp);
1106 if (ret < 0)
1107 return NULL;
1108 if (ret == 0)
1109 free_more_memory();
1110 }
1111}
1112EXPORT_SYMBOL(__getblk_slow);
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149void mark_buffer_dirty(struct buffer_head *bh)
1150{
1151 WARN_ON_ONCE(!buffer_uptodate(bh));
1152
1153 trace_block_dirty_buffer(bh);
1154
1155
1156
1157
1158
1159
1160
1161 if (buffer_dirty(bh)) {
1162 smp_mb();
1163 if (buffer_dirty(bh))
1164 return;
1165 }
1166
1167 if (!test_set_buffer_dirty(bh)) {
1168 struct page *page = bh->b_page;
1169 struct address_space *mapping = NULL;
1170 struct mem_cgroup *memcg;
1171
1172 memcg = mem_cgroup_begin_page_stat(page);
1173 if (!TestSetPageDirty(page)) {
1174 mapping = page_mapping(page);
1175 if (mapping)
1176 __set_page_dirty(page, mapping, memcg, 0);
1177 }
1178 mem_cgroup_end_page_stat(memcg);
1179 if (mapping)
1180 __mark_inode_dirty(mapping->host, I_DIRTY_PAGES);
1181 }
1182}
1183EXPORT_SYMBOL(mark_buffer_dirty);
1184
1185
1186
1187
1188
1189
1190
1191
1192void __brelse(struct buffer_head * buf)
1193{
1194 if (atomic_read(&buf->b_count)) {
1195 put_bh(buf);
1196 return;
1197 }
1198 WARN(1, KERN_ERR "VFS: brelse: Trying to free free buffer\n");
1199}
1200EXPORT_SYMBOL(__brelse);
1201
1202
1203
1204
1205
1206void __bforget(struct buffer_head *bh)
1207{
1208 clear_buffer_dirty(bh);
1209 if (bh->b_assoc_map) {
1210 struct address_space *buffer_mapping = bh->b_page->mapping;
1211
1212 spin_lock(&buffer_mapping->private_lock);
1213 list_del_init(&bh->b_assoc_buffers);
1214 bh->b_assoc_map = NULL;
1215 spin_unlock(&buffer_mapping->private_lock);
1216 }
1217 __brelse(bh);
1218}
1219EXPORT_SYMBOL(__bforget);
1220
1221static struct buffer_head *__bread_slow(struct buffer_head *bh)
1222{
1223 lock_buffer(bh);
1224 if (buffer_uptodate(bh)) {
1225 unlock_buffer(bh);
1226 return bh;
1227 } else {
1228 get_bh(bh);
1229 bh->b_end_io = end_buffer_read_sync;
1230 submit_bh(READ, bh);
1231 wait_on_buffer(bh);
1232 if (buffer_uptodate(bh))
1233 return bh;
1234 }
1235 brelse(bh);
1236 return NULL;
1237}
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253#define BH_LRU_SIZE 16
1254
1255struct bh_lru {
1256 struct buffer_head *bhs[BH_LRU_SIZE];
1257};
1258
1259static DEFINE_PER_CPU(struct bh_lru, bh_lrus) = {{ NULL }};
1260
1261#ifdef CONFIG_SMP
1262#define bh_lru_lock() local_irq_disable()
1263#define bh_lru_unlock() local_irq_enable()
1264#else
1265#define bh_lru_lock() preempt_disable()
1266#define bh_lru_unlock() preempt_enable()
1267#endif
1268
1269static inline void check_irqs_on(void)
1270{
1271#ifdef irqs_disabled
1272 BUG_ON(irqs_disabled());
1273#endif
1274}
1275
1276
1277
1278
1279static void bh_lru_install(struct buffer_head *bh)
1280{
1281 struct buffer_head *evictee = NULL;
1282
1283 check_irqs_on();
1284 bh_lru_lock();
1285 if (__this_cpu_read(bh_lrus.bhs[0]) != bh) {
1286 struct buffer_head *bhs[BH_LRU_SIZE];
1287 int in;
1288 int out = 0;
1289
1290 get_bh(bh);
1291 bhs[out++] = bh;
1292 for (in = 0; in < BH_LRU_SIZE; in++) {
1293 struct buffer_head *bh2 =
1294 __this_cpu_read(bh_lrus.bhs[in]);
1295
1296 if (bh2 == bh) {
1297 __brelse(bh2);
1298 } else {
1299 if (out >= BH_LRU_SIZE) {
1300 BUG_ON(evictee != NULL);
1301 evictee = bh2;
1302 } else {
1303 bhs[out++] = bh2;
1304 }
1305 }
1306 }
1307 while (out < BH_LRU_SIZE)
1308 bhs[out++] = NULL;
1309 memcpy(this_cpu_ptr(&bh_lrus.bhs), bhs, sizeof(bhs));
1310 }
1311 bh_lru_unlock();
1312
1313 if (evictee)
1314 __brelse(evictee);
1315}
1316
1317
1318
1319
1320static struct buffer_head *
1321lookup_bh_lru(struct block_device *bdev, sector_t block, unsigned size)
1322{
1323 struct buffer_head *ret = NULL;
1324 unsigned int i;
1325
1326 check_irqs_on();
1327 bh_lru_lock();
1328 for (i = 0; i < BH_LRU_SIZE; i++) {
1329 struct buffer_head *bh = __this_cpu_read(bh_lrus.bhs[i]);
1330
1331 if (bh && bh->b_blocknr == block && bh->b_bdev == bdev &&
1332 bh->b_size == size) {
1333 if (i) {
1334 while (i) {
1335 __this_cpu_write(bh_lrus.bhs[i],
1336 __this_cpu_read(bh_lrus.bhs[i - 1]));
1337 i--;
1338 }
1339 __this_cpu_write(bh_lrus.bhs[0], bh);
1340 }
1341 get_bh(bh);
1342 ret = bh;
1343 break;
1344 }
1345 }
1346 bh_lru_unlock();
1347 return ret;
1348}
1349
1350
1351
1352
1353
1354
1355struct buffer_head *
1356__find_get_block(struct block_device *bdev, sector_t block, unsigned size)
1357{
1358 struct buffer_head *bh = lookup_bh_lru(bdev, block, size);
1359
1360 if (bh == NULL) {
1361
1362 bh = __find_get_block_slow(bdev, block);
1363 if (bh)
1364 bh_lru_install(bh);
1365 } else
1366 touch_buffer(bh);
1367
1368 return bh;
1369}
1370EXPORT_SYMBOL(__find_get_block);
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380struct buffer_head *
1381__getblk_gfp(struct block_device *bdev, sector_t block,
1382 unsigned size, gfp_t gfp)
1383{
1384 struct buffer_head *bh = __find_get_block(bdev, block, size);
1385
1386 might_sleep();
1387 if (bh == NULL)
1388 bh = __getblk_slow(bdev, block, size, gfp);
1389 return bh;
1390}
1391EXPORT_SYMBOL(__getblk_gfp);
1392
1393
1394
1395
1396void __breadahead(struct block_device *bdev, sector_t block, unsigned size)
1397{
1398 struct buffer_head *bh = __getblk(bdev, block, size);
1399 if (likely(bh)) {
1400 ll_rw_block(READA, 1, &bh);
1401 brelse(bh);
1402 }
1403}
1404EXPORT_SYMBOL(__breadahead);
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418struct buffer_head *
1419__bread_gfp(struct block_device *bdev, sector_t block,
1420 unsigned size, gfp_t gfp)
1421{
1422 struct buffer_head *bh = __getblk_gfp(bdev, block, size, gfp);
1423
1424 if (likely(bh) && !buffer_uptodate(bh))
1425 bh = __bread_slow(bh);
1426 return bh;
1427}
1428EXPORT_SYMBOL(__bread_gfp);
1429
1430
1431
1432
1433
1434
1435static void invalidate_bh_lru(void *arg)
1436{
1437 struct bh_lru *b = &get_cpu_var(bh_lrus);
1438 int i;
1439
1440 for (i = 0; i < BH_LRU_SIZE; i++) {
1441 brelse(b->bhs[i]);
1442 b->bhs[i] = NULL;
1443 }
1444 put_cpu_var(bh_lrus);
1445}
1446
1447static bool has_bh_in_lru(int cpu, void *dummy)
1448{
1449 struct bh_lru *b = per_cpu_ptr(&bh_lrus, cpu);
1450 int i;
1451
1452 for (i = 0; i < BH_LRU_SIZE; i++) {
1453 if (b->bhs[i])
1454 return 1;
1455 }
1456
1457 return 0;
1458}
1459
1460void invalidate_bh_lrus(void)
1461{
1462 on_each_cpu_cond(has_bh_in_lru, invalidate_bh_lru, NULL, 1, GFP_KERNEL);
1463}
1464EXPORT_SYMBOL_GPL(invalidate_bh_lrus);
1465
1466void set_bh_page(struct buffer_head *bh,
1467 struct page *page, unsigned long offset)
1468{
1469 bh->b_page = page;
1470 BUG_ON(offset >= PAGE_SIZE);
1471 if (PageHighMem(page))
1472
1473
1474
1475 bh->b_data = (char *)(0 + offset);
1476 else
1477 bh->b_data = page_address(page) + offset;
1478}
1479EXPORT_SYMBOL(set_bh_page);
1480
1481
1482
1483
1484
1485
1486#define BUFFER_FLAGS_DISCARD \
1487 (1 << BH_Mapped | 1 << BH_New | 1 << BH_Req | \
1488 1 << BH_Delay | 1 << BH_Unwritten)
1489
1490static void discard_buffer(struct buffer_head * bh)
1491{
1492 unsigned long b_state, b_state_old;
1493
1494 lock_buffer(bh);
1495 clear_buffer_dirty(bh);
1496 bh->b_bdev = NULL;
1497 b_state = bh->b_state;
1498 for (;;) {
1499 b_state_old = cmpxchg(&bh->b_state, b_state,
1500 (b_state & ~BUFFER_FLAGS_DISCARD));
1501 if (b_state_old == b_state)
1502 break;
1503 b_state = b_state_old;
1504 }
1505 unlock_buffer(bh);
1506}
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524void block_invalidatepage(struct page *page, unsigned int offset,
1525 unsigned int length)
1526{
1527 struct buffer_head *head, *bh, *next;
1528 unsigned int curr_off = 0;
1529 unsigned int stop = length + offset;
1530
1531 BUG_ON(!PageLocked(page));
1532 if (!page_has_buffers(page))
1533 goto out;
1534
1535
1536
1537
1538 BUG_ON(stop > PAGE_CACHE_SIZE || stop < length);
1539
1540 head = page_buffers(page);
1541 bh = head;
1542 do {
1543 unsigned int next_off = curr_off + bh->b_size;
1544 next = bh->b_this_page;
1545
1546
1547
1548
1549 if (next_off > stop)
1550 goto out;
1551
1552
1553
1554
1555 if (offset <= curr_off)
1556 discard_buffer(bh);
1557 curr_off = next_off;
1558 bh = next;
1559 } while (bh != head);
1560
1561
1562
1563
1564
1565
1566 if (offset == 0)
1567 try_to_release_page(page, 0);
1568out:
1569 return;
1570}
1571EXPORT_SYMBOL(block_invalidatepage);
1572
1573
1574
1575
1576
1577
1578
1579void create_empty_buffers(struct page *page,
1580 unsigned long blocksize, unsigned long b_state)
1581{
1582 struct buffer_head *bh, *head, *tail;
1583
1584 head = alloc_page_buffers(page, blocksize, 1);
1585 bh = head;
1586 do {
1587 bh->b_state |= b_state;
1588 tail = bh;
1589 bh = bh->b_this_page;
1590 } while (bh);
1591 tail->b_this_page = head;
1592
1593 spin_lock(&page->mapping->private_lock);
1594 if (PageUptodate(page) || PageDirty(page)) {
1595 bh = head;
1596 do {
1597 if (PageDirty(page))
1598 set_buffer_dirty(bh);
1599 if (PageUptodate(page))
1600 set_buffer_uptodate(bh);
1601 bh = bh->b_this_page;
1602 } while (bh != head);
1603 }
1604 attach_page_buffers(page, head);
1605 spin_unlock(&page->mapping->private_lock);
1606}
1607EXPORT_SYMBOL(create_empty_buffers);
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625void unmap_underlying_metadata(struct block_device *bdev, sector_t block)
1626{
1627 struct buffer_head *old_bh;
1628
1629 might_sleep();
1630
1631 old_bh = __find_get_block_slow(bdev, block);
1632 if (old_bh) {
1633 clear_buffer_dirty(old_bh);
1634 wait_on_buffer(old_bh);
1635 clear_buffer_req(old_bh);
1636 __brelse(old_bh);
1637 }
1638}
1639EXPORT_SYMBOL(unmap_underlying_metadata);
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649static inline int block_size_bits(unsigned int blocksize)
1650{
1651 return ilog2(blocksize);
1652}
1653
1654static struct buffer_head *create_page_buffers(struct page *page, struct inode *inode, unsigned int b_state)
1655{
1656 BUG_ON(!PageLocked(page));
1657
1658 if (!page_has_buffers(page))
1659 create_empty_buffers(page, 1 << ACCESS_ONCE(inode->i_blkbits), b_state);
1660 return page_buffers(page);
1661}
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692static int __block_write_full_page(struct inode *inode, struct page *page,
1693 get_block_t *get_block, struct writeback_control *wbc,
1694 bh_end_io_t *handler)
1695{
1696 int err;
1697 sector_t block;
1698 sector_t last_block;
1699 struct buffer_head *bh, *head;
1700 unsigned int blocksize, bbits;
1701 int nr_underway = 0;
1702 int write_op = (wbc->sync_mode == WB_SYNC_ALL ? WRITE_SYNC : WRITE);
1703
1704 head = create_page_buffers(page, inode,
1705 (1 << BH_Dirty)|(1 << BH_Uptodate));
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717 bh = head;
1718 blocksize = bh->b_size;
1719 bbits = block_size_bits(blocksize);
1720
1721 block = (sector_t)page->index << (PAGE_CACHE_SHIFT - bbits);
1722 last_block = (i_size_read(inode) - 1) >> bbits;
1723
1724
1725
1726
1727
1728 do {
1729 if (block > last_block) {
1730
1731
1732
1733
1734
1735
1736
1737
1738 clear_buffer_dirty(bh);
1739 set_buffer_uptodate(bh);
1740 } else if ((!buffer_mapped(bh) || buffer_delay(bh)) &&
1741 buffer_dirty(bh)) {
1742 WARN_ON(bh->b_size != blocksize);
1743 err = get_block(inode, block, bh, 1);
1744 if (err)
1745 goto recover;
1746 clear_buffer_delay(bh);
1747 if (buffer_new(bh)) {
1748
1749 clear_buffer_new(bh);
1750 unmap_underlying_metadata(bh->b_bdev,
1751 bh->b_blocknr);
1752 }
1753 }
1754 bh = bh->b_this_page;
1755 block++;
1756 } while (bh != head);
1757
1758 do {
1759 if (!buffer_mapped(bh))
1760 continue;
1761
1762
1763
1764
1765
1766
1767
1768 if (wbc->sync_mode != WB_SYNC_NONE) {
1769 lock_buffer(bh);
1770 } else if (!trylock_buffer(bh)) {
1771 redirty_page_for_writepage(wbc, page);
1772 continue;
1773 }
1774 if (test_clear_buffer_dirty(bh)) {
1775 mark_buffer_async_write_endio(bh, handler);
1776 } else {
1777 unlock_buffer(bh);
1778 }
1779 } while ((bh = bh->b_this_page) != head);
1780
1781
1782
1783
1784
1785 BUG_ON(PageWriteback(page));
1786 set_page_writeback(page);
1787
1788 do {
1789 struct buffer_head *next = bh->b_this_page;
1790 if (buffer_async_write(bh)) {
1791 submit_bh_wbc(write_op, bh, 0, wbc);
1792 nr_underway++;
1793 }
1794 bh = next;
1795 } while (bh != head);
1796 unlock_page(page);
1797
1798 err = 0;
1799done:
1800 if (nr_underway == 0) {
1801
1802
1803
1804
1805
1806 end_page_writeback(page);
1807
1808
1809
1810
1811
1812 }
1813 return err;
1814
1815recover:
1816
1817
1818
1819
1820
1821
1822 bh = head;
1823
1824 do {
1825 if (buffer_mapped(bh) && buffer_dirty(bh) &&
1826 !buffer_delay(bh)) {
1827 lock_buffer(bh);
1828 mark_buffer_async_write_endio(bh, handler);
1829 } else {
1830
1831
1832
1833
1834 clear_buffer_dirty(bh);
1835 }
1836 } while ((bh = bh->b_this_page) != head);
1837 SetPageError(page);
1838 BUG_ON(PageWriteback(page));
1839 mapping_set_error(page->mapping, err);
1840 set_page_writeback(page);
1841 do {
1842 struct buffer_head *next = bh->b_this_page;
1843 if (buffer_async_write(bh)) {
1844 clear_buffer_dirty(bh);
1845 submit_bh_wbc(write_op, bh, 0, wbc);
1846 nr_underway++;
1847 }
1848 bh = next;
1849 } while (bh != head);
1850 unlock_page(page);
1851 goto done;
1852}
1853
1854
1855
1856
1857
1858
1859void page_zero_new_buffers(struct page *page, unsigned from, unsigned to)
1860{
1861 unsigned int block_start, block_end;
1862 struct buffer_head *head, *bh;
1863
1864 BUG_ON(!PageLocked(page));
1865 if (!page_has_buffers(page))
1866 return;
1867
1868 bh = head = page_buffers(page);
1869 block_start = 0;
1870 do {
1871 block_end = block_start + bh->b_size;
1872
1873 if (buffer_new(bh)) {
1874 if (block_end > from && block_start < to) {
1875 if (!PageUptodate(page)) {
1876 unsigned start, size;
1877
1878 start = max(from, block_start);
1879 size = min(to, block_end) - start;
1880
1881 zero_user(page, start, size);
1882 set_buffer_uptodate(bh);
1883 }
1884
1885 clear_buffer_new(bh);
1886 mark_buffer_dirty(bh);
1887 }
1888 }
1889
1890 block_start = block_end;
1891 bh = bh->b_this_page;
1892 } while (bh != head);
1893}
1894EXPORT_SYMBOL(page_zero_new_buffers);
1895
1896int __block_write_begin(struct page *page, loff_t pos, unsigned len,
1897 get_block_t *get_block)
1898{
1899 unsigned from = pos & (PAGE_CACHE_SIZE - 1);
1900 unsigned to = from + len;
1901 struct inode *inode = page->mapping->host;
1902 unsigned block_start, block_end;
1903 sector_t block;
1904 int err = 0;
1905 unsigned blocksize, bbits;
1906 struct buffer_head *bh, *head, *wait[2], **wait_bh=wait;
1907
1908 BUG_ON(!PageLocked(page));
1909 BUG_ON(from > PAGE_CACHE_SIZE);
1910 BUG_ON(to > PAGE_CACHE_SIZE);
1911 BUG_ON(from > to);
1912
1913 head = create_page_buffers(page, inode, 0);
1914 blocksize = head->b_size;
1915 bbits = block_size_bits(blocksize);
1916
1917 block = (sector_t)page->index << (PAGE_CACHE_SHIFT - bbits);
1918
1919 for(bh = head, block_start = 0; bh != head || !block_start;
1920 block++, block_start=block_end, bh = bh->b_this_page) {
1921 block_end = block_start + blocksize;
1922 if (block_end <= from || block_start >= to) {
1923 if (PageUptodate(page)) {
1924 if (!buffer_uptodate(bh))
1925 set_buffer_uptodate(bh);
1926 }
1927 continue;
1928 }
1929 if (buffer_new(bh))
1930 clear_buffer_new(bh);
1931 if (!buffer_mapped(bh)) {
1932 WARN_ON(bh->b_size != blocksize);
1933 err = get_block(inode, block, bh, 1);
1934 if (err)
1935 break;
1936 if (buffer_new(bh)) {
1937 unmap_underlying_metadata(bh->b_bdev,
1938 bh->b_blocknr);
1939 if (PageUptodate(page)) {
1940 clear_buffer_new(bh);
1941 set_buffer_uptodate(bh);
1942 mark_buffer_dirty(bh);
1943 continue;
1944 }
1945 if (block_end > to || block_start < from)
1946 zero_user_segments(page,
1947 to, block_end,
1948 block_start, from);
1949 continue;
1950 }
1951 }
1952 if (PageUptodate(page)) {
1953 if (!buffer_uptodate(bh))
1954 set_buffer_uptodate(bh);
1955 continue;
1956 }
1957 if (!buffer_uptodate(bh) && !buffer_delay(bh) &&
1958 !buffer_unwritten(bh) &&
1959 (block_start < from || block_end > to)) {
1960 ll_rw_block(READ, 1, &bh);
1961 *wait_bh++=bh;
1962 }
1963 }
1964
1965
1966
1967 while(wait_bh > wait) {
1968 wait_on_buffer(*--wait_bh);
1969 if (!buffer_uptodate(*wait_bh))
1970 err = -EIO;
1971 }
1972 if (unlikely(err))
1973 page_zero_new_buffers(page, from, to);
1974 return err;
1975}
1976EXPORT_SYMBOL(__block_write_begin);
1977
1978static int __block_commit_write(struct inode *inode, struct page *page,
1979 unsigned from, unsigned to)
1980{
1981 unsigned block_start, block_end;
1982 int partial = 0;
1983 unsigned blocksize;
1984 struct buffer_head *bh, *head;
1985
1986 bh = head = page_buffers(page);
1987 blocksize = bh->b_size;
1988
1989 block_start = 0;
1990 do {
1991 block_end = block_start + blocksize;
1992 if (block_end <= from || block_start >= to) {
1993 if (!buffer_uptodate(bh))
1994 partial = 1;
1995 } else {
1996 set_buffer_uptodate(bh);
1997 mark_buffer_dirty(bh);
1998 }
1999 clear_buffer_new(bh);
2000
2001 block_start = block_end;
2002 bh = bh->b_this_page;
2003 } while (bh != head);
2004
2005
2006
2007
2008
2009
2010
2011 if (!partial)
2012 SetPageUptodate(page);
2013 return 0;
2014}
2015
2016
2017
2018
2019
2020
2021
2022int block_write_begin(struct address_space *mapping, loff_t pos, unsigned len,
2023 unsigned flags, struct page **pagep, get_block_t *get_block)
2024{
2025 pgoff_t index = pos >> PAGE_CACHE_SHIFT;
2026 struct page *page;
2027 int status;
2028
2029 page = grab_cache_page_write_begin(mapping, index, flags);
2030 if (!page)
2031 return -ENOMEM;
2032
2033 status = __block_write_begin(page, pos, len, get_block);
2034 if (unlikely(status)) {
2035 unlock_page(page);
2036 page_cache_release(page);
2037 page = NULL;
2038 }
2039
2040 *pagep = page;
2041 return status;
2042}
2043EXPORT_SYMBOL(block_write_begin);
2044
2045int block_write_end(struct file *file, struct address_space *mapping,
2046 loff_t pos, unsigned len, unsigned copied,
2047 struct page *page, void *fsdata)
2048{
2049 struct inode *inode = mapping->host;
2050 unsigned start;
2051
2052 start = pos & (PAGE_CACHE_SIZE - 1);
2053
2054 if (unlikely(copied < len)) {
2055
2056
2057
2058
2059
2060
2061
2062
2063
2064
2065
2066
2067 if (!PageUptodate(page))
2068 copied = 0;
2069
2070 page_zero_new_buffers(page, start+copied, start+len);
2071 }
2072 flush_dcache_page(page);
2073
2074
2075 __block_commit_write(inode, page, start, start+copied);
2076
2077 return copied;
2078}
2079EXPORT_SYMBOL(block_write_end);
2080
2081int generic_write_end(struct file *file, struct address_space *mapping,
2082 loff_t pos, unsigned len, unsigned copied,
2083 struct page *page, void *fsdata)
2084{
2085 struct inode *inode = mapping->host;
2086 loff_t old_size = inode->i_size;
2087 int i_size_changed = 0;
2088
2089 copied = block_write_end(file, mapping, pos, len, copied, page, fsdata);
2090
2091
2092
2093
2094
2095
2096
2097
2098 if (pos+copied > inode->i_size) {
2099 i_size_write(inode, pos+copied);
2100 i_size_changed = 1;
2101 }
2102
2103 unlock_page(page);
2104 page_cache_release(page);
2105
2106 if (old_size < pos)
2107 pagecache_isize_extended(inode, old_size, pos);
2108
2109
2110
2111
2112
2113
2114 if (i_size_changed)
2115 mark_inode_dirty(inode);
2116
2117 return copied;
2118}
2119EXPORT_SYMBOL(generic_write_end);
2120
2121
2122
2123
2124
2125
2126
2127
2128int block_is_partially_uptodate(struct page *page, unsigned long from,
2129 unsigned long count)
2130{
2131 unsigned block_start, block_end, blocksize;
2132 unsigned to;
2133 struct buffer_head *bh, *head;
2134 int ret = 1;
2135
2136 if (!page_has_buffers(page))
2137 return 0;
2138
2139 head = page_buffers(page);
2140 blocksize = head->b_size;
2141 to = min_t(unsigned, PAGE_CACHE_SIZE - from, count);
2142 to = from + to;
2143 if (from < blocksize && to > PAGE_CACHE_SIZE - blocksize)
2144 return 0;
2145
2146 bh = head;
2147 block_start = 0;
2148 do {
2149 block_end = block_start + blocksize;
2150 if (block_end > from && block_start < to) {
2151 if (!buffer_uptodate(bh)) {
2152 ret = 0;
2153 break;
2154 }
2155 if (block_end >= to)
2156 break;
2157 }
2158 block_start = block_end;
2159 bh = bh->b_this_page;
2160 } while (bh != head);
2161
2162 return ret;
2163}
2164EXPORT_SYMBOL(block_is_partially_uptodate);
2165
2166
2167
2168
2169
2170
2171
2172
2173int block_read_full_page(struct page *page, get_block_t *get_block)
2174{
2175 struct inode *inode = page->mapping->host;
2176 sector_t iblock, lblock;
2177 struct buffer_head *bh, *head, *arr[MAX_BUF_PER_PAGE];
2178 unsigned int blocksize, bbits;
2179 int nr, i;
2180 int fully_mapped = 1;
2181
2182 head = create_page_buffers(page, inode, 0);
2183 blocksize = head->b_size;
2184 bbits = block_size_bits(blocksize);
2185
2186 iblock = (sector_t)page->index << (PAGE_CACHE_SHIFT - bbits);
2187 lblock = (i_size_read(inode)+blocksize-1) >> bbits;
2188 bh = head;
2189 nr = 0;
2190 i = 0;
2191
2192 do {
2193 if (buffer_uptodate(bh))
2194 continue;
2195
2196 if (!buffer_mapped(bh)) {
2197 int err = 0;
2198
2199 fully_mapped = 0;
2200 if (iblock < lblock) {
2201 WARN_ON(bh->b_size != blocksize);
2202 err = get_block(inode, iblock, bh, 0);
2203 if (err)
2204 SetPageError(page);
2205 }
2206 if (!buffer_mapped(bh)) {
2207 zero_user(page, i * blocksize, blocksize);
2208 if (!err)
2209 set_buffer_uptodate(bh);
2210 continue;
2211 }
2212
2213
2214
2215
2216 if (buffer_uptodate(bh))
2217 continue;
2218 }
2219 arr[nr++] = bh;
2220 } while (i++, iblock++, (bh = bh->b_this_page) != head);
2221
2222 if (fully_mapped)
2223 SetPageMappedToDisk(page);
2224
2225 if (!nr) {
2226
2227
2228
2229
2230 if (!PageError(page))
2231 SetPageUptodate(page);
2232 unlock_page(page);
2233 return 0;
2234 }
2235
2236
2237 for (i = 0; i < nr; i++) {
2238 bh = arr[i];
2239 lock_buffer(bh);
2240 mark_buffer_async_read(bh);
2241 }
2242
2243
2244
2245
2246
2247
2248 for (i = 0; i < nr; i++) {
2249 bh = arr[i];
2250 if (buffer_uptodate(bh))
2251 end_buffer_async_read(bh, 1);
2252 else
2253 submit_bh(READ, bh);
2254 }
2255 return 0;
2256}
2257EXPORT_SYMBOL(block_read_full_page);
2258
2259
2260
2261
2262
2263int generic_cont_expand_simple(struct inode *inode, loff_t size)
2264{
2265 struct address_space *mapping = inode->i_mapping;
2266 struct page *page;
2267 void *fsdata;
2268 int err;
2269
2270 err = inode_newsize_ok(inode, size);
2271 if (err)
2272 goto out;
2273
2274 err = pagecache_write_begin(NULL, mapping, size, 0,
2275 AOP_FLAG_UNINTERRUPTIBLE|AOP_FLAG_CONT_EXPAND,
2276 &page, &fsdata);
2277 if (err)
2278 goto out;
2279
2280 err = pagecache_write_end(NULL, mapping, size, 0, 0, page, fsdata);
2281 BUG_ON(err > 0);
2282
2283out:
2284 return err;
2285}
2286EXPORT_SYMBOL(generic_cont_expand_simple);
2287
2288static int cont_expand_zero(struct file *file, struct address_space *mapping,
2289 loff_t pos, loff_t *bytes)
2290{
2291 struct inode *inode = mapping->host;
2292 unsigned blocksize = 1 << inode->i_blkbits;
2293 struct page *page;
2294 void *fsdata;
2295 pgoff_t index, curidx;
2296 loff_t curpos;
2297 unsigned zerofrom, offset, len;
2298 int err = 0;
2299
2300 index = pos >> PAGE_CACHE_SHIFT;
2301 offset = pos & ~PAGE_CACHE_MASK;
2302
2303 while (index > (curidx = (curpos = *bytes)>>PAGE_CACHE_SHIFT)) {
2304 zerofrom = curpos & ~PAGE_CACHE_MASK;
2305 if (zerofrom & (blocksize-1)) {
2306 *bytes |= (blocksize-1);
2307 (*bytes)++;
2308 }
2309 len = PAGE_CACHE_SIZE - zerofrom;
2310
2311 err = pagecache_write_begin(file, mapping, curpos, len,
2312 AOP_FLAG_UNINTERRUPTIBLE,
2313 &page, &fsdata);
2314 if (err)
2315 goto out;
2316 zero_user(page, zerofrom, len);
2317 err = pagecache_write_end(file, mapping, curpos, len, len,
2318 page, fsdata);
2319 if (err < 0)
2320 goto out;
2321 BUG_ON(err != len);
2322 err = 0;
2323
2324 balance_dirty_pages_ratelimited(mapping);
2325
2326 if (unlikely(fatal_signal_pending(current))) {
2327 err = -EINTR;
2328 goto out;
2329 }
2330 }
2331
2332
2333 if (index == curidx) {
2334 zerofrom = curpos & ~PAGE_CACHE_MASK;
2335
2336 if (offset <= zerofrom) {
2337 goto out;
2338 }
2339 if (zerofrom & (blocksize-1)) {
2340 *bytes |= (blocksize-1);
2341 (*bytes)++;
2342 }
2343 len = offset - zerofrom;
2344
2345 err = pagecache_write_begin(file, mapping, curpos, len,
2346 AOP_FLAG_UNINTERRUPTIBLE,
2347 &page, &fsdata);
2348 if (err)
2349 goto out;
2350 zero_user(page, zerofrom, len);
2351 err = pagecache_write_end(file, mapping, curpos, len, len,
2352 page, fsdata);
2353 if (err < 0)
2354 goto out;
2355 BUG_ON(err != len);
2356 err = 0;
2357 }
2358out:
2359 return err;
2360}
2361
2362
2363
2364
2365
2366int cont_write_begin(struct file *file, struct address_space *mapping,
2367 loff_t pos, unsigned len, unsigned flags,
2368 struct page **pagep, void **fsdata,
2369 get_block_t *get_block, loff_t *bytes)
2370{
2371 struct inode *inode = mapping->host;
2372 unsigned blocksize = 1 << inode->i_blkbits;
2373 unsigned zerofrom;
2374 int err;
2375
2376 err = cont_expand_zero(file, mapping, pos, bytes);
2377 if (err)
2378 return err;
2379
2380 zerofrom = *bytes & ~PAGE_CACHE_MASK;
2381 if (pos+len > *bytes && zerofrom & (blocksize-1)) {
2382 *bytes |= (blocksize-1);
2383 (*bytes)++;
2384 }
2385
2386 return block_write_begin(mapping, pos, len, flags, pagep, get_block);
2387}
2388EXPORT_SYMBOL(cont_write_begin);
2389
2390int block_commit_write(struct page *page, unsigned from, unsigned to)
2391{
2392 struct inode *inode = page->mapping->host;
2393 __block_commit_write(inode,page,from,to);
2394 return 0;
2395}
2396EXPORT_SYMBOL(block_commit_write);
2397
2398
2399
2400
2401
2402
2403
2404
2405
2406
2407
2408
2409
2410
2411
2412
2413
2414
2415
2416int block_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf,
2417 get_block_t get_block)
2418{
2419 struct page *page = vmf->page;
2420 struct inode *inode = file_inode(vma->vm_file);
2421 unsigned long end;
2422 loff_t size;
2423 int ret;
2424
2425 lock_page(page);
2426 size = i_size_read(inode);
2427 if ((page->mapping != inode->i_mapping) ||
2428 (page_offset(page) > size)) {
2429
2430 ret = -EFAULT;
2431 goto out_unlock;
2432 }
2433
2434
2435 if (((page->index + 1) << PAGE_CACHE_SHIFT) > size)
2436 end = size & ~PAGE_CACHE_MASK;
2437 else
2438 end = PAGE_CACHE_SIZE;
2439
2440 ret = __block_write_begin(page, 0, end, get_block);
2441 if (!ret)
2442 ret = block_commit_write(page, 0, end);
2443
2444 if (unlikely(ret < 0))
2445 goto out_unlock;
2446 set_page_dirty(page);
2447 wait_for_stable_page(page);
2448 return 0;
2449out_unlock:
2450 unlock_page(page);
2451 return ret;
2452}
2453EXPORT_SYMBOL(block_page_mkwrite);
2454
2455
2456
2457
2458
2459
2460static void end_buffer_read_nobh(struct buffer_head *bh, int uptodate)
2461{
2462 __end_buffer_read_notouch(bh, uptodate);
2463}
2464
2465
2466
2467
2468
2469
2470static void attach_nobh_buffers(struct page *page, struct buffer_head *head)
2471{
2472 struct buffer_head *bh;
2473
2474 BUG_ON(!PageLocked(page));
2475
2476 spin_lock(&page->mapping->private_lock);
2477 bh = head;
2478 do {
2479 if (PageDirty(page))
2480 set_buffer_dirty(bh);
2481 if (!bh->b_this_page)
2482 bh->b_this_page = head;
2483 bh = bh->b_this_page;
2484 } while (bh != head);
2485 attach_page_buffers(page, head);
2486 spin_unlock(&page->mapping->private_lock);
2487}
2488
2489
2490
2491
2492
2493
2494int nobh_write_begin(struct address_space *mapping,
2495 loff_t pos, unsigned len, unsigned flags,
2496 struct page **pagep, void **fsdata,
2497 get_block_t *get_block)
2498{
2499 struct inode *inode = mapping->host;
2500 const unsigned blkbits = inode->i_blkbits;
2501 const unsigned blocksize = 1 << blkbits;
2502 struct buffer_head *head, *bh;
2503 struct page *page;
2504 pgoff_t index;
2505 unsigned from, to;
2506 unsigned block_in_page;
2507 unsigned block_start, block_end;
2508 sector_t block_in_file;
2509 int nr_reads = 0;
2510 int ret = 0;
2511 int is_mapped_to_disk = 1;
2512
2513 index = pos >> PAGE_CACHE_SHIFT;
2514 from = pos & (PAGE_CACHE_SIZE - 1);
2515 to = from + len;
2516
2517 page = grab_cache_page_write_begin(mapping, index, flags);
2518 if (!page)
2519 return -ENOMEM;
2520 *pagep = page;
2521 *fsdata = NULL;
2522
2523 if (page_has_buffers(page)) {
2524 ret = __block_write_begin(page, pos, len, get_block);
2525 if (unlikely(ret))
2526 goto out_release;
2527 return ret;
2528 }
2529
2530 if (PageMappedToDisk(page))
2531 return 0;
2532
2533
2534
2535
2536
2537
2538
2539
2540
2541
2542 head = alloc_page_buffers(page, blocksize, 0);
2543 if (!head) {
2544 ret = -ENOMEM;
2545 goto out_release;
2546 }
2547
2548 block_in_file = (sector_t)page->index << (PAGE_CACHE_SHIFT - blkbits);
2549
2550
2551
2552
2553
2554
2555 for (block_start = 0, block_in_page = 0, bh = head;
2556 block_start < PAGE_CACHE_SIZE;
2557 block_in_page++, block_start += blocksize, bh = bh->b_this_page) {
2558 int create;
2559
2560 block_end = block_start + blocksize;
2561 bh->b_state = 0;
2562 create = 1;
2563 if (block_start >= to)
2564 create = 0;
2565 ret = get_block(inode, block_in_file + block_in_page,
2566 bh, create);
2567 if (ret)
2568 goto failed;
2569 if (!buffer_mapped(bh))
2570 is_mapped_to_disk = 0;
2571 if (buffer_new(bh))
2572 unmap_underlying_metadata(bh->b_bdev, bh->b_blocknr);
2573 if (PageUptodate(page)) {
2574 set_buffer_uptodate(bh);
2575 continue;
2576 }
2577 if (buffer_new(bh) || !buffer_mapped(bh)) {
2578 zero_user_segments(page, block_start, from,
2579 to, block_end);
2580 continue;
2581 }
2582 if (buffer_uptodate(bh))
2583 continue;
2584 if (block_start < from || block_end > to) {
2585 lock_buffer(bh);
2586 bh->b_end_io = end_buffer_read_nobh;
2587 submit_bh(READ, bh);
2588 nr_reads++;
2589 }
2590 }
2591
2592 if (nr_reads) {
2593
2594
2595
2596
2597
2598 for (bh = head; bh; bh = bh->b_this_page) {
2599 wait_on_buffer(bh);
2600 if (!buffer_uptodate(bh))
2601 ret = -EIO;
2602 }
2603 if (ret)
2604 goto failed;
2605 }
2606
2607 if (is_mapped_to_disk)
2608 SetPageMappedToDisk(page);
2609
2610 *fsdata = head;
2611
2612 return 0;
2613
2614failed:
2615 BUG_ON(!ret);
2616
2617
2618
2619
2620
2621
2622
2623 attach_nobh_buffers(page, head);
2624 page_zero_new_buffers(page, from, to);
2625
2626out_release:
2627 unlock_page(page);
2628 page_cache_release(page);
2629 *pagep = NULL;
2630
2631 return ret;
2632}
2633EXPORT_SYMBOL(nobh_write_begin);
2634
2635int nobh_write_end(struct file *file, struct address_space *mapping,
2636 loff_t pos, unsigned len, unsigned copied,
2637 struct page *page, void *fsdata)
2638{
2639 struct inode *inode = page->mapping->host;
2640 struct buffer_head *head = fsdata;
2641 struct buffer_head *bh;
2642 BUG_ON(fsdata != NULL && page_has_buffers(page));
2643
2644 if (unlikely(copied < len) && head)
2645 attach_nobh_buffers(page, head);
2646 if (page_has_buffers(page))
2647 return generic_write_end(file, mapping, pos, len,
2648 copied, page, fsdata);
2649
2650 SetPageUptodate(page);
2651 set_page_dirty(page);
2652 if (pos+copied > inode->i_size) {
2653 i_size_write(inode, pos+copied);
2654 mark_inode_dirty(inode);
2655 }
2656
2657 unlock_page(page);
2658 page_cache_release(page);
2659
2660 while (head) {
2661 bh = head;
2662 head = head->b_this_page;
2663 free_buffer_head(bh);
2664 }
2665
2666 return copied;
2667}
2668EXPORT_SYMBOL(nobh_write_end);
2669
2670
2671
2672
2673
2674
2675int nobh_writepage(struct page *page, get_block_t *get_block,
2676 struct writeback_control *wbc)
2677{
2678 struct inode * const inode = page->mapping->host;
2679 loff_t i_size = i_size_read(inode);
2680 const pgoff_t end_index = i_size >> PAGE_CACHE_SHIFT;
2681 unsigned offset;
2682 int ret;
2683
2684
2685 if (page->index < end_index)
2686 goto out;
2687
2688
2689 offset = i_size & (PAGE_CACHE_SIZE-1);
2690 if (page->index >= end_index+1 || !offset) {
2691
2692
2693
2694
2695
2696#if 0
2697
2698 if (page->mapping->a_ops->invalidatepage)
2699 page->mapping->a_ops->invalidatepage(page, offset);
2700#endif
2701 unlock_page(page);
2702 return 0;
2703 }
2704
2705
2706
2707
2708
2709
2710
2711
2712 zero_user_segment(page, offset, PAGE_CACHE_SIZE);
2713out:
2714 ret = mpage_writepage(page, get_block, wbc);
2715 if (ret == -EAGAIN)
2716 ret = __block_write_full_page(inode, page, get_block, wbc,
2717 end_buffer_async_write);
2718 return ret;
2719}
2720EXPORT_SYMBOL(nobh_writepage);
2721
2722int nobh_truncate_page(struct address_space *mapping,
2723 loff_t from, get_block_t *get_block)
2724{
2725 pgoff_t index = from >> PAGE_CACHE_SHIFT;
2726 unsigned offset = from & (PAGE_CACHE_SIZE-1);
2727 unsigned blocksize;
2728 sector_t iblock;
2729 unsigned length, pos;
2730 struct inode *inode = mapping->host;
2731 struct page *page;
2732 struct buffer_head map_bh;
2733 int err;
2734
2735 blocksize = 1 << inode->i_blkbits;
2736 length = offset & (blocksize - 1);
2737
2738
2739 if (!length)
2740 return 0;
2741
2742 length = blocksize - length;
2743 iblock = (sector_t)index << (PAGE_CACHE_SHIFT - inode->i_blkbits);
2744
2745 page = grab_cache_page(mapping, index);
2746 err = -ENOMEM;
2747 if (!page)
2748 goto out;
2749
2750 if (page_has_buffers(page)) {
2751has_buffers:
2752 unlock_page(page);
2753 page_cache_release(page);
2754 return block_truncate_page(mapping, from, get_block);
2755 }
2756
2757
2758 pos = blocksize;
2759 while (offset >= pos) {
2760 iblock++;
2761 pos += blocksize;
2762 }
2763
2764 map_bh.b_size = blocksize;
2765 map_bh.b_state = 0;
2766 err = get_block(inode, iblock, &map_bh, 0);
2767 if (err)
2768 goto unlock;
2769
2770 if (!buffer_mapped(&map_bh))
2771 goto unlock;
2772
2773
2774 if (!PageUptodate(page)) {
2775 err = mapping->a_ops->readpage(NULL, page);
2776 if (err) {
2777 page_cache_release(page);
2778 goto out;
2779 }
2780 lock_page(page);
2781 if (!PageUptodate(page)) {
2782 err = -EIO;
2783 goto unlock;
2784 }
2785 if (page_has_buffers(page))
2786 goto has_buffers;
2787 }
2788 zero_user(page, offset, length);
2789 set_page_dirty(page);
2790 err = 0;
2791
2792unlock:
2793 unlock_page(page);
2794 page_cache_release(page);
2795out:
2796 return err;
2797}
2798EXPORT_SYMBOL(nobh_truncate_page);
2799
2800int block_truncate_page(struct address_space *mapping,
2801 loff_t from, get_block_t *get_block)
2802{
2803 pgoff_t index = from >> PAGE_CACHE_SHIFT;
2804 unsigned offset = from & (PAGE_CACHE_SIZE-1);
2805 unsigned blocksize;
2806 sector_t iblock;
2807 unsigned length, pos;
2808 struct inode *inode = mapping->host;
2809 struct page *page;
2810 struct buffer_head *bh;
2811 int err;
2812
2813 blocksize = 1 << inode->i_blkbits;
2814 length = offset & (blocksize - 1);
2815
2816
2817 if (!length)
2818 return 0;
2819
2820 length = blocksize - length;
2821 iblock = (sector_t)index << (PAGE_CACHE_SHIFT - inode->i_blkbits);
2822
2823 page = grab_cache_page(mapping, index);
2824 err = -ENOMEM;
2825 if (!page)
2826 goto out;
2827
2828 if (!page_has_buffers(page))
2829 create_empty_buffers(page, blocksize, 0);
2830
2831
2832 bh = page_buffers(page);
2833 pos = blocksize;
2834 while (offset >= pos) {
2835 bh = bh->b_this_page;
2836 iblock++;
2837 pos += blocksize;
2838 }
2839
2840 err = 0;
2841 if (!buffer_mapped(bh)) {
2842 WARN_ON(bh->b_size != blocksize);
2843 err = get_block(inode, iblock, bh, 0);
2844 if (err)
2845 goto unlock;
2846
2847 if (!buffer_mapped(bh))
2848 goto unlock;
2849 }
2850
2851
2852 if (PageUptodate(page))
2853 set_buffer_uptodate(bh);
2854
2855 if (!buffer_uptodate(bh) && !buffer_delay(bh) && !buffer_unwritten(bh)) {
2856 err = -EIO;
2857 ll_rw_block(READ, 1, &bh);
2858 wait_on_buffer(bh);
2859
2860 if (!buffer_uptodate(bh))
2861 goto unlock;
2862 }
2863
2864 zero_user(page, offset, length);
2865 mark_buffer_dirty(bh);
2866 err = 0;
2867
2868unlock:
2869 unlock_page(page);
2870 page_cache_release(page);
2871out:
2872 return err;
2873}
2874EXPORT_SYMBOL(block_truncate_page);
2875
2876
2877
2878
2879int block_write_full_page(struct page *page, get_block_t *get_block,
2880 struct writeback_control *wbc)
2881{
2882 struct inode * const inode = page->mapping->host;
2883 loff_t i_size = i_size_read(inode);
2884 const pgoff_t end_index = i_size >> PAGE_CACHE_SHIFT;
2885 unsigned offset;
2886
2887
2888 if (page->index < end_index)
2889 return __block_write_full_page(inode, page, get_block, wbc,
2890 end_buffer_async_write);
2891
2892
2893 offset = i_size & (PAGE_CACHE_SIZE-1);
2894 if (page->index >= end_index+1 || !offset) {
2895
2896
2897
2898
2899
2900 do_invalidatepage(page, 0, PAGE_CACHE_SIZE);
2901 unlock_page(page);
2902 return 0;
2903 }
2904
2905
2906
2907
2908
2909
2910
2911
2912 zero_user_segment(page, offset, PAGE_CACHE_SIZE);
2913 return __block_write_full_page(inode, page, get_block, wbc,
2914 end_buffer_async_write);
2915}
2916EXPORT_SYMBOL(block_write_full_page);
2917
2918sector_t generic_block_bmap(struct address_space *mapping, sector_t block,
2919 get_block_t *get_block)
2920{
2921 struct buffer_head tmp;
2922 struct inode *inode = mapping->host;
2923 tmp.b_state = 0;
2924 tmp.b_blocknr = 0;
2925 tmp.b_size = 1 << inode->i_blkbits;
2926 get_block(inode, block, &tmp, 0);
2927 return tmp.b_blocknr;
2928}
2929EXPORT_SYMBOL(generic_block_bmap);
2930
2931static void end_bio_bh_io_sync(struct bio *bio)
2932{
2933 struct buffer_head *bh = bio->bi_private;
2934
2935 if (unlikely(bio_flagged(bio, BIO_QUIET)))
2936 set_bit(BH_Quiet, &bh->b_state);
2937
2938 bh->b_end_io(bh, !bio->bi_error);
2939 bio_put(bio);
2940}
2941
2942
2943
2944
2945
2946
2947
2948
2949
2950
2951
2952
2953
2954void guard_bio_eod(int rw, struct bio *bio)
2955{
2956 sector_t maxsector;
2957 struct bio_vec *bvec = &bio->bi_io_vec[bio->bi_vcnt - 1];
2958 unsigned truncated_bytes;
2959
2960 maxsector = i_size_read(bio->bi_bdev->bd_inode) >> 9;
2961 if (!maxsector)
2962 return;
2963
2964
2965
2966
2967
2968
2969 if (unlikely(bio->bi_iter.bi_sector >= maxsector))
2970 return;
2971
2972 maxsector -= bio->bi_iter.bi_sector;
2973 if (likely((bio->bi_iter.bi_size >> 9) <= maxsector))
2974 return;
2975
2976
2977 truncated_bytes = bio->bi_iter.bi_size - (maxsector << 9);
2978
2979
2980 bio->bi_iter.bi_size -= truncated_bytes;
2981 bvec->bv_len -= truncated_bytes;
2982
2983
2984 if ((rw & RW_MASK) == READ) {
2985 zero_user(bvec->bv_page, bvec->bv_offset + bvec->bv_len,
2986 truncated_bytes);
2987 }
2988}
2989
2990static int submit_bh_wbc(int rw, struct buffer_head *bh,
2991 unsigned long bio_flags, struct writeback_control *wbc)
2992{
2993 struct bio *bio;
2994
2995 BUG_ON(!buffer_locked(bh));
2996 BUG_ON(!buffer_mapped(bh));
2997 BUG_ON(!bh->b_end_io);
2998 BUG_ON(buffer_delay(bh));
2999 BUG_ON(buffer_unwritten(bh));
3000
3001
3002
3003
3004 if (test_set_buffer_req(bh) && (rw & WRITE))
3005 clear_buffer_write_io_error(bh);
3006
3007
3008
3009
3010
3011 bio = bio_alloc(GFP_NOIO, 1);
3012
3013 if (wbc) {
3014 wbc_init_bio(wbc, bio);
3015 wbc_account_io(wbc, bh->b_page, bh->b_size);
3016 }
3017
3018 bio->bi_iter.bi_sector = bh->b_blocknr * (bh->b_size >> 9);
3019 bio->bi_bdev = bh->b_bdev;
3020
3021 bio_add_page(bio, bh->b_page, bh->b_size, bh_offset(bh));
3022 BUG_ON(bio->bi_iter.bi_size != bh->b_size);
3023
3024 bio->bi_end_io = end_bio_bh_io_sync;
3025 bio->bi_private = bh;
3026 bio->bi_flags |= bio_flags;
3027
3028
3029 guard_bio_eod(rw, bio);
3030
3031 if (buffer_meta(bh))
3032 rw |= REQ_META;
3033 if (buffer_prio(bh))
3034 rw |= REQ_PRIO;
3035
3036 submit_bio(rw, bio);
3037 return 0;
3038}
3039
3040int _submit_bh(int rw, struct buffer_head *bh, unsigned long bio_flags)
3041{
3042 return submit_bh_wbc(rw, bh, bio_flags, NULL);
3043}
3044EXPORT_SYMBOL_GPL(_submit_bh);
3045
3046int submit_bh(int rw, struct buffer_head *bh)
3047{
3048 return submit_bh_wbc(rw, bh, 0, NULL);
3049}
3050EXPORT_SYMBOL(submit_bh);
3051
3052
3053
3054
3055
3056
3057
3058
3059
3060
3061
3062
3063
3064
3065
3066
3067
3068
3069
3070
3071
3072
3073
3074
3075
3076
3077void ll_rw_block(int rw, int nr, struct buffer_head *bhs[])
3078{
3079 int i;
3080
3081 for (i = 0; i < nr; i++) {
3082 struct buffer_head *bh = bhs[i];
3083
3084 if (!trylock_buffer(bh))
3085 continue;
3086 if (rw == WRITE) {
3087 if (test_clear_buffer_dirty(bh)) {
3088 bh->b_end_io = end_buffer_write_sync;
3089 get_bh(bh);
3090 submit_bh(WRITE, bh);
3091 continue;
3092 }
3093 } else {
3094 if (!buffer_uptodate(bh)) {
3095 bh->b_end_io = end_buffer_read_sync;
3096 get_bh(bh);
3097 submit_bh(rw, bh);
3098 continue;
3099 }
3100 }
3101 unlock_buffer(bh);
3102 }
3103}
3104EXPORT_SYMBOL(ll_rw_block);
3105
3106void write_dirty_buffer(struct buffer_head *bh, int rw)
3107{
3108 lock_buffer(bh);
3109 if (!test_clear_buffer_dirty(bh)) {
3110 unlock_buffer(bh);
3111 return;
3112 }
3113 bh->b_end_io = end_buffer_write_sync;
3114 get_bh(bh);
3115 submit_bh(rw, bh);
3116}
3117EXPORT_SYMBOL(write_dirty_buffer);
3118
3119
3120
3121
3122
3123
3124int __sync_dirty_buffer(struct buffer_head *bh, int rw)
3125{
3126 int ret = 0;
3127
3128 WARN_ON(atomic_read(&bh->b_count) < 1);
3129 lock_buffer(bh);
3130 if (test_clear_buffer_dirty(bh)) {
3131 get_bh(bh);
3132 bh->b_end_io = end_buffer_write_sync;
3133 ret = submit_bh(rw, bh);
3134 wait_on_buffer(bh);
3135 if (!ret && !buffer_uptodate(bh))
3136 ret = -EIO;
3137 } else {
3138 unlock_buffer(bh);
3139 }
3140 return ret;
3141}
3142EXPORT_SYMBOL(__sync_dirty_buffer);
3143
3144int sync_dirty_buffer(struct buffer_head *bh)
3145{
3146 return __sync_dirty_buffer(bh, WRITE_SYNC);
3147}
3148EXPORT_SYMBOL(sync_dirty_buffer);
3149
3150
3151
3152
3153
3154
3155
3156
3157
3158
3159
3160
3161
3162
3163
3164
3165
3166
3167
3168
3169
3170static inline int buffer_busy(struct buffer_head *bh)
3171{
3172 return atomic_read(&bh->b_count) |
3173 (bh->b_state & ((1 << BH_Dirty) | (1 << BH_Lock)));
3174}
3175
3176static int
3177drop_buffers(struct page *page, struct buffer_head **buffers_to_free)
3178{
3179 struct buffer_head *head = page_buffers(page);
3180 struct buffer_head *bh;
3181
3182 bh = head;
3183 do {
3184 if (buffer_write_io_error(bh) && page->mapping)
3185 set_bit(AS_EIO, &page->mapping->flags);
3186 if (buffer_busy(bh))
3187 goto failed;
3188 bh = bh->b_this_page;
3189 } while (bh != head);
3190
3191 do {
3192 struct buffer_head *next = bh->b_this_page;
3193
3194 if (bh->b_assoc_map)
3195 __remove_assoc_queue(bh);
3196 bh = next;
3197 } while (bh != head);
3198 *buffers_to_free = head;
3199 __clear_page_buffers(page);
3200 return 1;
3201failed:
3202 return 0;
3203}
3204
3205int try_to_free_buffers(struct page *page)
3206{
3207 struct address_space * const mapping = page->mapping;
3208 struct buffer_head *buffers_to_free = NULL;
3209 int ret = 0;
3210
3211 BUG_ON(!PageLocked(page));
3212 if (PageWriteback(page))
3213 return 0;
3214
3215 if (mapping == NULL) {
3216 ret = drop_buffers(page, &buffers_to_free);
3217 goto out;
3218 }
3219
3220 spin_lock(&mapping->private_lock);
3221 ret = drop_buffers(page, &buffers_to_free);
3222
3223
3224
3225
3226
3227
3228
3229
3230
3231
3232
3233
3234
3235
3236
3237 if (ret)
3238 cancel_dirty_page(page);
3239 spin_unlock(&mapping->private_lock);
3240out:
3241 if (buffers_to_free) {
3242 struct buffer_head *bh = buffers_to_free;
3243
3244 do {
3245 struct buffer_head *next = bh->b_this_page;
3246 free_buffer_head(bh);
3247 bh = next;
3248 } while (bh != buffers_to_free);
3249 }
3250 return ret;
3251}
3252EXPORT_SYMBOL(try_to_free_buffers);
3253
3254
3255
3256
3257
3258
3259
3260
3261SYSCALL_DEFINE2(bdflush, int, func, long, data)
3262{
3263 static int msg_count;
3264
3265 if (!capable(CAP_SYS_ADMIN))
3266 return -EPERM;
3267
3268 if (msg_count < 5) {
3269 msg_count++;
3270 printk(KERN_INFO
3271 "warning: process `%s' used the obsolete bdflush"
3272 " system call\n", current->comm);
3273 printk(KERN_INFO "Fix your initscripts?\n");
3274 }
3275
3276 if (func == 1)
3277 do_exit(0);
3278 return 0;
3279}
3280
3281
3282
3283
3284static struct kmem_cache *bh_cachep __read_mostly;
3285
3286
3287
3288
3289
3290static unsigned long max_buffer_heads;
3291
3292int buffer_heads_over_limit;
3293
3294struct bh_accounting {
3295 int nr;
3296 int ratelimit;
3297};
3298
3299static DEFINE_PER_CPU(struct bh_accounting, bh_accounting) = {0, 0};
3300
3301static void recalc_bh_state(void)
3302{
3303 int i;
3304 int tot = 0;
3305
3306 if (__this_cpu_inc_return(bh_accounting.ratelimit) - 1 < 4096)
3307 return;
3308 __this_cpu_write(bh_accounting.ratelimit, 0);
3309 for_each_online_cpu(i)
3310 tot += per_cpu(bh_accounting, i).nr;
3311 buffer_heads_over_limit = (tot > max_buffer_heads);
3312}
3313
3314struct buffer_head *alloc_buffer_head(gfp_t gfp_flags)
3315{
3316 struct buffer_head *ret = kmem_cache_zalloc(bh_cachep, gfp_flags);
3317 if (ret) {
3318 INIT_LIST_HEAD(&ret->b_assoc_buffers);
3319 preempt_disable();
3320 __this_cpu_inc(bh_accounting.nr);
3321 recalc_bh_state();
3322 preempt_enable();
3323 }
3324 return ret;
3325}
3326EXPORT_SYMBOL(alloc_buffer_head);
3327
3328void free_buffer_head(struct buffer_head *bh)
3329{
3330 BUG_ON(!list_empty(&bh->b_assoc_buffers));
3331 kmem_cache_free(bh_cachep, bh);
3332 preempt_disable();
3333 __this_cpu_dec(bh_accounting.nr);
3334 recalc_bh_state();
3335 preempt_enable();
3336}
3337EXPORT_SYMBOL(free_buffer_head);
3338
3339static void buffer_exit_cpu(int cpu)
3340{
3341 int i;
3342 struct bh_lru *b = &per_cpu(bh_lrus, cpu);
3343
3344 for (i = 0; i < BH_LRU_SIZE; i++) {
3345 brelse(b->bhs[i]);
3346 b->bhs[i] = NULL;
3347 }
3348 this_cpu_add(bh_accounting.nr, per_cpu(bh_accounting, cpu).nr);
3349 per_cpu(bh_accounting, cpu).nr = 0;
3350}
3351
3352static int buffer_cpu_notify(struct notifier_block *self,
3353 unsigned long action, void *hcpu)
3354{
3355 if (action == CPU_DEAD || action == CPU_DEAD_FROZEN)
3356 buffer_exit_cpu((unsigned long)hcpu);
3357 return NOTIFY_OK;
3358}
3359
3360
3361
3362
3363
3364
3365
3366
3367int bh_uptodate_or_lock(struct buffer_head *bh)
3368{
3369 if (!buffer_uptodate(bh)) {
3370 lock_buffer(bh);
3371 if (!buffer_uptodate(bh))
3372 return 0;
3373 unlock_buffer(bh);
3374 }
3375 return 1;
3376}
3377EXPORT_SYMBOL(bh_uptodate_or_lock);
3378
3379
3380
3381
3382
3383
3384
3385int bh_submit_read(struct buffer_head *bh)
3386{
3387 BUG_ON(!buffer_locked(bh));
3388
3389 if (buffer_uptodate(bh)) {
3390 unlock_buffer(bh);
3391 return 0;
3392 }
3393
3394 get_bh(bh);
3395 bh->b_end_io = end_buffer_read_sync;
3396 submit_bh(READ, bh);
3397 wait_on_buffer(bh);
3398 if (buffer_uptodate(bh))
3399 return 0;
3400 return -EIO;
3401}
3402EXPORT_SYMBOL(bh_submit_read);
3403
3404void __init buffer_init(void)
3405{
3406 unsigned long nrpages;
3407
3408 bh_cachep = kmem_cache_create("buffer_head",
3409 sizeof(struct buffer_head), 0,
3410 (SLAB_RECLAIM_ACCOUNT|SLAB_PANIC|
3411 SLAB_MEM_SPREAD),
3412 NULL);
3413
3414
3415
3416
3417 nrpages = (nr_free_buffer_pages() * 10) / 100;
3418 max_buffer_heads = nrpages * (PAGE_SIZE / sizeof(struct buffer_head));
3419 hotcpu_notifier(buffer_cpu_notify, 0);
3420}
3421