1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21#include <linux/kernel.h>
22#include <linux/syscalls.h>
23#include <linux/fs.h>
24#include <linux/mm.h>
25#include <linux/percpu.h>
26#include <linux/slab.h>
27#include <linux/capability.h>
28#include <linux/blkdev.h>
29#include <linux/file.h>
30#include <linux/quotaops.h>
31#include <linux/highmem.h>
32#include <linux/export.h>
33#include <linux/writeback.h>
34#include <linux/hash.h>
35#include <linux/suspend.h>
36#include <linux/buffer_head.h>
37#include <linux/task_io_accounting_ops.h>
38#include <linux/bio.h>
39#include <linux/notifier.h>
40#include <linux/cpu.h>
41#include <linux/bitops.h>
42#include <linux/mpage.h>
43#include <linux/bit_spinlock.h>
44#include <trace/events/block.h>
45
46static int fsync_buffers_list(spinlock_t *lock, struct list_head *list);
47
48#define BH_ENTRY(list) list_entry((list), struct buffer_head, b_assoc_buffers)
49
50void init_buffer(struct buffer_head *bh, bh_end_io_t *handler, void *private)
51{
52 bh->b_end_io = handler;
53 bh->b_private = private;
54}
55EXPORT_SYMBOL(init_buffer);
56
57inline void touch_buffer(struct buffer_head *bh)
58{
59 trace_block_touch_buffer(bh);
60 mark_page_accessed(bh->b_page);
61}
62EXPORT_SYMBOL(touch_buffer);
63
64static int sleep_on_buffer(void *word)
65{
66 io_schedule();
67 return 0;
68}
69
70void __lock_buffer(struct buffer_head *bh)
71{
72 wait_on_bit_lock(&bh->b_state, BH_Lock, sleep_on_buffer,
73 TASK_UNINTERRUPTIBLE);
74}
75EXPORT_SYMBOL(__lock_buffer);
76
77void unlock_buffer(struct buffer_head *bh)
78{
79 clear_bit_unlock(BH_Lock, &bh->b_state);
80 smp_mb__after_atomic();
81 wake_up_bit(&bh->b_state, BH_Lock);
82}
83EXPORT_SYMBOL(unlock_buffer);
84
85
86
87
88
89
90void buffer_check_dirty_writeback(struct page *page,
91 bool *dirty, bool *writeback)
92{
93 struct buffer_head *head, *bh;
94 *dirty = false;
95 *writeback = false;
96
97 BUG_ON(!PageLocked(page));
98
99 if (!page_has_buffers(page))
100 return;
101
102 if (PageWriteback(page))
103 *writeback = true;
104
105 head = page_buffers(page);
106 bh = head;
107 do {
108 if (buffer_locked(bh))
109 *writeback = true;
110
111 if (buffer_dirty(bh))
112 *dirty = true;
113
114 bh = bh->b_this_page;
115 } while (bh != head);
116}
117EXPORT_SYMBOL(buffer_check_dirty_writeback);
118
119
120
121
122
123
124void __wait_on_buffer(struct buffer_head * bh)
125{
126 wait_on_bit(&bh->b_state, BH_Lock, sleep_on_buffer, TASK_UNINTERRUPTIBLE);
127}
128EXPORT_SYMBOL(__wait_on_buffer);
129
130static void
131__clear_page_buffers(struct page *page)
132{
133 ClearPagePrivate(page);
134 set_page_private(page, 0);
135 page_cache_release(page);
136}
137
138
139static int quiet_error(struct buffer_head *bh)
140{
141 if (!test_bit(BH_Quiet, &bh->b_state) && printk_ratelimit())
142 return 0;
143 return 1;
144}
145
146
147static void buffer_io_error(struct buffer_head *bh)
148{
149 char b[BDEVNAME_SIZE];
150 printk(KERN_ERR "Buffer I/O error on device %s, logical block %Lu\n",
151 bdevname(bh->b_bdev, b),
152 (unsigned long long)bh->b_blocknr);
153}
154
155
156
157
158
159
160
161
162
163static void __end_buffer_read_notouch(struct buffer_head *bh, int uptodate)
164{
165 if (uptodate) {
166 set_buffer_uptodate(bh);
167 } else {
168
169 clear_buffer_uptodate(bh);
170 }
171 unlock_buffer(bh);
172}
173
174
175
176
177
178void end_buffer_read_sync(struct buffer_head *bh, int uptodate)
179{
180 __end_buffer_read_notouch(bh, uptodate);
181 put_bh(bh);
182}
183EXPORT_SYMBOL(end_buffer_read_sync);
184
185void end_buffer_write_sync(struct buffer_head *bh, int uptodate)
186{
187 char b[BDEVNAME_SIZE];
188
189 if (uptodate) {
190 set_buffer_uptodate(bh);
191 } else {
192 if (!quiet_error(bh)) {
193 buffer_io_error(bh);
194 printk(KERN_WARNING "lost page write due to "
195 "I/O error on %s\n",
196 bdevname(bh->b_bdev, b));
197 }
198 set_buffer_write_io_error(bh);
199 clear_buffer_uptodate(bh);
200 }
201 unlock_buffer(bh);
202 put_bh(bh);
203}
204EXPORT_SYMBOL(end_buffer_write_sync);
205
206
207
208
209
210
211
212
213
214
215
216
217static struct buffer_head *
218__find_get_block_slow(struct block_device *bdev, sector_t block)
219{
220 struct inode *bd_inode = bdev->bd_inode;
221 struct address_space *bd_mapping = bd_inode->i_mapping;
222 struct buffer_head *ret = NULL;
223 pgoff_t index;
224 struct buffer_head *bh;
225 struct buffer_head *head;
226 struct page *page;
227 int all_mapped = 1;
228
229 index = block >> (PAGE_CACHE_SHIFT - bd_inode->i_blkbits);
230 page = find_get_page_flags(bd_mapping, index, FGP_ACCESSED);
231 if (!page)
232 goto out;
233
234 spin_lock(&bd_mapping->private_lock);
235 if (!page_has_buffers(page))
236 goto out_unlock;
237 head = page_buffers(page);
238 bh = head;
239 do {
240 if (!buffer_mapped(bh))
241 all_mapped = 0;
242 else if (bh->b_blocknr == block) {
243 ret = bh;
244 get_bh(bh);
245 goto out_unlock;
246 }
247 bh = bh->b_this_page;
248 } while (bh != head);
249
250
251
252
253
254
255 if (all_mapped) {
256 char b[BDEVNAME_SIZE];
257
258 printk("__find_get_block_slow() failed. "
259 "block=%llu, b_blocknr=%llu\n",
260 (unsigned long long)block,
261 (unsigned long long)bh->b_blocknr);
262 printk("b_state=0x%08lx, b_size=%zu\n",
263 bh->b_state, bh->b_size);
264 printk("device %s blocksize: %d\n", bdevname(bdev, b),
265 1 << bd_inode->i_blkbits);
266 }
267out_unlock:
268 spin_unlock(&bd_mapping->private_lock);
269 page_cache_release(page);
270out:
271 return ret;
272}
273
274
275
276
277static void free_more_memory(void)
278{
279 struct zone *zone;
280 int nid;
281
282 wakeup_flusher_threads(1024, WB_REASON_FREE_MORE_MEM);
283 yield();
284
285 for_each_online_node(nid) {
286 (void)first_zones_zonelist(node_zonelist(nid, GFP_NOFS),
287 gfp_zone(GFP_NOFS), NULL,
288 &zone);
289 if (zone)
290 try_to_free_pages(node_zonelist(nid, GFP_NOFS), 0,
291 GFP_NOFS, NULL);
292 }
293}
294
295
296
297
298
299static void end_buffer_async_read(struct buffer_head *bh, int uptodate)
300{
301 unsigned long flags;
302 struct buffer_head *first;
303 struct buffer_head *tmp;
304 struct page *page;
305 int page_uptodate = 1;
306
307 BUG_ON(!buffer_async_read(bh));
308
309 page = bh->b_page;
310 if (uptodate) {
311 set_buffer_uptodate(bh);
312 } else {
313 clear_buffer_uptodate(bh);
314 if (!quiet_error(bh))
315 buffer_io_error(bh);
316 SetPageError(page);
317 }
318
319
320
321
322
323
324 first = page_buffers(page);
325 local_irq_save(flags);
326 bit_spin_lock(BH_Uptodate_Lock, &first->b_state);
327 clear_buffer_async_read(bh);
328 unlock_buffer(bh);
329 tmp = bh;
330 do {
331 if (!buffer_uptodate(tmp))
332 page_uptodate = 0;
333 if (buffer_async_read(tmp)) {
334 BUG_ON(!buffer_locked(tmp));
335 goto still_busy;
336 }
337 tmp = tmp->b_this_page;
338 } while (tmp != bh);
339 bit_spin_unlock(BH_Uptodate_Lock, &first->b_state);
340 local_irq_restore(flags);
341
342
343
344
345
346 if (page_uptodate && !PageError(page))
347 SetPageUptodate(page);
348 unlock_page(page);
349 return;
350
351still_busy:
352 bit_spin_unlock(BH_Uptodate_Lock, &first->b_state);
353 local_irq_restore(flags);
354 return;
355}
356
357
358
359
360
361void end_buffer_async_write(struct buffer_head *bh, int uptodate)
362{
363 char b[BDEVNAME_SIZE];
364 unsigned long flags;
365 struct buffer_head *first;
366 struct buffer_head *tmp;
367 struct page *page;
368
369 BUG_ON(!buffer_async_write(bh));
370
371 page = bh->b_page;
372 if (uptodate) {
373 set_buffer_uptodate(bh);
374 } else {
375 if (!quiet_error(bh)) {
376 buffer_io_error(bh);
377 printk(KERN_WARNING "lost page write due to "
378 "I/O error on %s\n",
379 bdevname(bh->b_bdev, b));
380 }
381 set_bit(AS_EIO, &page->mapping->flags);
382 set_buffer_write_io_error(bh);
383 clear_buffer_uptodate(bh);
384 SetPageError(page);
385 }
386
387 first = page_buffers(page);
388 local_irq_save(flags);
389 bit_spin_lock(BH_Uptodate_Lock, &first->b_state);
390
391 clear_buffer_async_write(bh);
392 unlock_buffer(bh);
393 tmp = bh->b_this_page;
394 while (tmp != bh) {
395 if (buffer_async_write(tmp)) {
396 BUG_ON(!buffer_locked(tmp));
397 goto still_busy;
398 }
399 tmp = tmp->b_this_page;
400 }
401 bit_spin_unlock(BH_Uptodate_Lock, &first->b_state);
402 local_irq_restore(flags);
403 end_page_writeback(page);
404 return;
405
406still_busy:
407 bit_spin_unlock(BH_Uptodate_Lock, &first->b_state);
408 local_irq_restore(flags);
409 return;
410}
411EXPORT_SYMBOL(end_buffer_async_write);
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434static void mark_buffer_async_read(struct buffer_head *bh)
435{
436 bh->b_end_io = end_buffer_async_read;
437 set_buffer_async_read(bh);
438}
439
440static void mark_buffer_async_write_endio(struct buffer_head *bh,
441 bh_end_io_t *handler)
442{
443 bh->b_end_io = handler;
444 set_buffer_async_write(bh);
445}
446
447void mark_buffer_async_write(struct buffer_head *bh)
448{
449 mark_buffer_async_write_endio(bh, end_buffer_async_write);
450}
451EXPORT_SYMBOL(mark_buffer_async_write);
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506static void __remove_assoc_queue(struct buffer_head *bh)
507{
508 list_del_init(&bh->b_assoc_buffers);
509 WARN_ON(!bh->b_assoc_map);
510 if (buffer_write_io_error(bh))
511 set_bit(AS_EIO, &bh->b_assoc_map->flags);
512 bh->b_assoc_map = NULL;
513}
514
515int inode_has_buffers(struct inode *inode)
516{
517 return !list_empty(&inode->i_data.private_list);
518}
519
520
521
522
523
524
525
526
527
528
529
530static int osync_buffers_list(spinlock_t *lock, struct list_head *list)
531{
532 struct buffer_head *bh;
533 struct list_head *p;
534 int err = 0;
535
536 spin_lock(lock);
537repeat:
538 list_for_each_prev(p, list) {
539 bh = BH_ENTRY(p);
540 if (buffer_locked(bh)) {
541 get_bh(bh);
542 spin_unlock(lock);
543 wait_on_buffer(bh);
544 if (!buffer_uptodate(bh))
545 err = -EIO;
546 brelse(bh);
547 spin_lock(lock);
548 goto repeat;
549 }
550 }
551 spin_unlock(lock);
552 return err;
553}
554
555static void do_thaw_one(struct super_block *sb, void *unused)
556{
557 char b[BDEVNAME_SIZE];
558 while (sb->s_bdev && !thaw_bdev(sb->s_bdev, sb))
559 printk(KERN_WARNING "Emergency Thaw on %s\n",
560 bdevname(sb->s_bdev, b));
561}
562
563static void do_thaw_all(struct work_struct *work)
564{
565 iterate_supers(do_thaw_one, NULL);
566 kfree(work);
567 printk(KERN_WARNING "Emergency Thaw complete\n");
568}
569
570
571
572
573
574
575void emergency_thaw_all(void)
576{
577 struct work_struct *work;
578
579 work = kmalloc(sizeof(*work), GFP_ATOMIC);
580 if (work) {
581 INIT_WORK(work, do_thaw_all);
582 schedule_work(work);
583 }
584}
585
586
587
588
589
590
591
592
593
594
595
596
597int sync_mapping_buffers(struct address_space *mapping)
598{
599 struct address_space *buffer_mapping = mapping->private_data;
600
601 if (buffer_mapping == NULL || list_empty(&mapping->private_list))
602 return 0;
603
604 return fsync_buffers_list(&buffer_mapping->private_lock,
605 &mapping->private_list);
606}
607EXPORT_SYMBOL(sync_mapping_buffers);
608
609
610
611
612
613
614
615void write_boundary_block(struct block_device *bdev,
616 sector_t bblock, unsigned blocksize)
617{
618 struct buffer_head *bh = __find_get_block(bdev, bblock + 1, blocksize);
619 if (bh) {
620 if (buffer_dirty(bh))
621 ll_rw_block(WRITE, 1, &bh);
622 put_bh(bh);
623 }
624}
625
626void mark_buffer_dirty_inode(struct buffer_head *bh, struct inode *inode)
627{
628 struct address_space *mapping = inode->i_mapping;
629 struct address_space *buffer_mapping = bh->b_page->mapping;
630
631 mark_buffer_dirty(bh);
632 if (!mapping->private_data) {
633 mapping->private_data = buffer_mapping;
634 } else {
635 BUG_ON(mapping->private_data != buffer_mapping);
636 }
637 if (!bh->b_assoc_map) {
638 spin_lock(&buffer_mapping->private_lock);
639 list_move_tail(&bh->b_assoc_buffers,
640 &mapping->private_list);
641 bh->b_assoc_map = mapping;
642 spin_unlock(&buffer_mapping->private_lock);
643 }
644}
645EXPORT_SYMBOL(mark_buffer_dirty_inode);
646
647
648
649
650
651
652
653
654static void __set_page_dirty(struct page *page,
655 struct address_space *mapping, int warn)
656{
657 unsigned long flags;
658
659 spin_lock_irqsave(&mapping->tree_lock, flags);
660 if (page->mapping) {
661 WARN_ON_ONCE(warn && !PageUptodate(page));
662 account_page_dirtied(page, mapping);
663 radix_tree_tag_set(&mapping->page_tree,
664 page_index(page), PAGECACHE_TAG_DIRTY);
665 }
666 spin_unlock_irqrestore(&mapping->tree_lock, flags);
667 __mark_inode_dirty(mapping->host, I_DIRTY_PAGES);
668}
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695int __set_page_dirty_buffers(struct page *page)
696{
697 int newly_dirty;
698 struct address_space *mapping = page_mapping(page);
699
700 if (unlikely(!mapping))
701 return !TestSetPageDirty(page);
702
703 spin_lock(&mapping->private_lock);
704 if (page_has_buffers(page)) {
705 struct buffer_head *head = page_buffers(page);
706 struct buffer_head *bh = head;
707
708 do {
709 set_buffer_dirty(bh);
710 bh = bh->b_this_page;
711 } while (bh != head);
712 }
713 newly_dirty = !TestSetPageDirty(page);
714 spin_unlock(&mapping->private_lock);
715
716 if (newly_dirty)
717 __set_page_dirty(page, mapping, 1);
718 return newly_dirty;
719}
720EXPORT_SYMBOL(__set_page_dirty_buffers);
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741static int fsync_buffers_list(spinlock_t *lock, struct list_head *list)
742{
743 struct buffer_head *bh;
744 struct list_head tmp;
745 struct address_space *mapping;
746 int err = 0, err2;
747 struct blk_plug plug;
748
749 INIT_LIST_HEAD(&tmp);
750 blk_start_plug(&plug);
751
752 spin_lock(lock);
753 while (!list_empty(list)) {
754 bh = BH_ENTRY(list->next);
755 mapping = bh->b_assoc_map;
756 __remove_assoc_queue(bh);
757
758
759 smp_mb();
760 if (buffer_dirty(bh) || buffer_locked(bh)) {
761 list_add(&bh->b_assoc_buffers, &tmp);
762 bh->b_assoc_map = mapping;
763 if (buffer_dirty(bh)) {
764 get_bh(bh);
765 spin_unlock(lock);
766
767
768
769
770
771
772
773 write_dirty_buffer(bh, WRITE_SYNC);
774
775
776
777
778
779
780
781 brelse(bh);
782 spin_lock(lock);
783 }
784 }
785 }
786
787 spin_unlock(lock);
788 blk_finish_plug(&plug);
789 spin_lock(lock);
790
791 while (!list_empty(&tmp)) {
792 bh = BH_ENTRY(tmp.prev);
793 get_bh(bh);
794 mapping = bh->b_assoc_map;
795 __remove_assoc_queue(bh);
796
797
798 smp_mb();
799 if (buffer_dirty(bh)) {
800 list_add(&bh->b_assoc_buffers,
801 &mapping->private_list);
802 bh->b_assoc_map = mapping;
803 }
804 spin_unlock(lock);
805 wait_on_buffer(bh);
806 if (!buffer_uptodate(bh))
807 err = -EIO;
808 brelse(bh);
809 spin_lock(lock);
810 }
811
812 spin_unlock(lock);
813 err2 = osync_buffers_list(lock, list);
814 if (err)
815 return err;
816 else
817 return err2;
818}
819
820
821
822
823
824
825
826
827
828
829void invalidate_inode_buffers(struct inode *inode)
830{
831 if (inode_has_buffers(inode)) {
832 struct address_space *mapping = &inode->i_data;
833 struct list_head *list = &mapping->private_list;
834 struct address_space *buffer_mapping = mapping->private_data;
835
836 spin_lock(&buffer_mapping->private_lock);
837 while (!list_empty(list))
838 __remove_assoc_queue(BH_ENTRY(list->next));
839 spin_unlock(&buffer_mapping->private_lock);
840 }
841}
842EXPORT_SYMBOL(invalidate_inode_buffers);
843
844
845
846
847
848
849
850int remove_inode_buffers(struct inode *inode)
851{
852 int ret = 1;
853
854 if (inode_has_buffers(inode)) {
855 struct address_space *mapping = &inode->i_data;
856 struct list_head *list = &mapping->private_list;
857 struct address_space *buffer_mapping = mapping->private_data;
858
859 spin_lock(&buffer_mapping->private_lock);
860 while (!list_empty(list)) {
861 struct buffer_head *bh = BH_ENTRY(list->next);
862 if (buffer_dirty(bh)) {
863 ret = 0;
864 break;
865 }
866 __remove_assoc_queue(bh);
867 }
868 spin_unlock(&buffer_mapping->private_lock);
869 }
870 return ret;
871}
872
873
874
875
876
877
878
879
880
881
882struct buffer_head *alloc_page_buffers(struct page *page, unsigned long size,
883 int retry)
884{
885 struct buffer_head *bh, *head;
886 long offset;
887
888try_again:
889 head = NULL;
890 offset = PAGE_SIZE;
891 while ((offset -= size) >= 0) {
892 bh = alloc_buffer_head(GFP_NOFS);
893 if (!bh)
894 goto no_grow;
895
896 bh->b_this_page = head;
897 bh->b_blocknr = -1;
898 head = bh;
899
900 bh->b_size = size;
901
902
903 set_bh_page(bh, page, offset);
904 }
905 return head;
906
907
908
909no_grow:
910 if (head) {
911 do {
912 bh = head;
913 head = head->b_this_page;
914 free_buffer_head(bh);
915 } while (head);
916 }
917
918
919
920
921
922
923
924 if (!retry)
925 return NULL;
926
927
928
929
930
931
932
933 free_more_memory();
934 goto try_again;
935}
936EXPORT_SYMBOL_GPL(alloc_page_buffers);
937
938static inline void
939link_dev_buffers(struct page *page, struct buffer_head *head)
940{
941 struct buffer_head *bh, *tail;
942
943 bh = head;
944 do {
945 tail = bh;
946 bh = bh->b_this_page;
947 } while (bh);
948 tail->b_this_page = head;
949 attach_page_buffers(page, head);
950}
951
952static sector_t blkdev_max_block(struct block_device *bdev, unsigned int size)
953{
954 sector_t retval = ~((sector_t)0);
955 loff_t sz = i_size_read(bdev->bd_inode);
956
957 if (sz) {
958 unsigned int sizebits = blksize_bits(size);
959 retval = (sz >> sizebits);
960 }
961 return retval;
962}
963
964
965
966
967static sector_t
968init_page_buffers(struct page *page, struct block_device *bdev,
969 sector_t block, int size)
970{
971 struct buffer_head *head = page_buffers(page);
972 struct buffer_head *bh = head;
973 int uptodate = PageUptodate(page);
974 sector_t end_block = blkdev_max_block(I_BDEV(bdev->bd_inode), size);
975
976 do {
977 if (!buffer_mapped(bh)) {
978 init_buffer(bh, NULL, NULL);
979 bh->b_bdev = bdev;
980 bh->b_blocknr = block;
981 if (uptodate)
982 set_buffer_uptodate(bh);
983 if (block < end_block)
984 set_buffer_mapped(bh);
985 }
986 block++;
987 bh = bh->b_this_page;
988 } while (bh != head);
989
990
991
992
993 return end_block;
994}
995
996
997
998
999
1000
1001static int
1002grow_dev_page(struct block_device *bdev, sector_t block,
1003 pgoff_t index, int size, int sizebits)
1004{
1005 struct inode *inode = bdev->bd_inode;
1006 struct page *page;
1007 struct buffer_head *bh;
1008 sector_t end_block;
1009 int ret = 0;
1010 gfp_t gfp_mask;
1011
1012 gfp_mask = mapping_gfp_mask(inode->i_mapping) & ~__GFP_FS;
1013 gfp_mask |= __GFP_MOVABLE;
1014
1015
1016
1017
1018
1019
1020 gfp_mask |= __GFP_NOFAIL;
1021
1022 page = find_or_create_page(inode->i_mapping, index, gfp_mask);
1023 if (!page)
1024 return ret;
1025
1026 BUG_ON(!PageLocked(page));
1027
1028 if (page_has_buffers(page)) {
1029 bh = page_buffers(page);
1030 if (bh->b_size == size) {
1031 end_block = init_page_buffers(page, bdev,
1032 index << sizebits, size);
1033 goto done;
1034 }
1035 if (!try_to_free_buffers(page))
1036 goto failed;
1037 }
1038
1039
1040
1041
1042 bh = alloc_page_buffers(page, size, 0);
1043 if (!bh)
1044 goto failed;
1045
1046
1047
1048
1049
1050
1051 spin_lock(&inode->i_mapping->private_lock);
1052 link_dev_buffers(page, bh);
1053 end_block = init_page_buffers(page, bdev, index << sizebits, size);
1054 spin_unlock(&inode->i_mapping->private_lock);
1055done:
1056 ret = (block < end_block) ? 1 : -ENXIO;
1057failed:
1058 unlock_page(page);
1059 page_cache_release(page);
1060 return ret;
1061}
1062
1063
1064
1065
1066
1067static int
1068grow_buffers(struct block_device *bdev, sector_t block, int size)
1069{
1070 pgoff_t index;
1071 int sizebits;
1072
1073 sizebits = -1;
1074 do {
1075 sizebits++;
1076 } while ((size << sizebits) < PAGE_SIZE);
1077
1078 index = block >> sizebits;
1079
1080
1081
1082
1083
1084 if (unlikely(index != block >> sizebits)) {
1085 char b[BDEVNAME_SIZE];
1086
1087 printk(KERN_ERR "%s: requested out-of-range block %llu for "
1088 "device %s\n",
1089 __func__, (unsigned long long)block,
1090 bdevname(bdev, b));
1091 return -EIO;
1092 }
1093
1094
1095 return grow_dev_page(bdev, block, index, size, sizebits);
1096}
1097
1098static struct buffer_head *
1099__getblk_slow(struct block_device *bdev, sector_t block, int size)
1100{
1101
1102 if (unlikely(size & (bdev_logical_block_size(bdev)-1) ||
1103 (size < 512 || size > PAGE_SIZE))) {
1104 printk(KERN_ERR "getblk(): invalid block size %d requested\n",
1105 size);
1106 printk(KERN_ERR "logical block size: %d\n",
1107 bdev_logical_block_size(bdev));
1108
1109 dump_stack();
1110 return NULL;
1111 }
1112
1113 for (;;) {
1114 struct buffer_head *bh;
1115 int ret;
1116
1117 bh = __find_get_block(bdev, block, size);
1118 if (bh)
1119 return bh;
1120
1121 ret = grow_buffers(bdev, block, size);
1122 if (ret < 0)
1123 return NULL;
1124 if (ret == 0)
1125 free_more_memory();
1126 }
1127}
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164void mark_buffer_dirty(struct buffer_head *bh)
1165{
1166 WARN_ON_ONCE(!buffer_uptodate(bh));
1167
1168 trace_block_dirty_buffer(bh);
1169
1170
1171
1172
1173
1174
1175
1176 if (buffer_dirty(bh)) {
1177 smp_mb();
1178 if (buffer_dirty(bh))
1179 return;
1180 }
1181
1182 if (!test_set_buffer_dirty(bh)) {
1183 struct page *page = bh->b_page;
1184 if (!TestSetPageDirty(page)) {
1185 struct address_space *mapping = page_mapping(page);
1186 if (mapping)
1187 __set_page_dirty(page, mapping, 0);
1188 }
1189 }
1190}
1191EXPORT_SYMBOL(mark_buffer_dirty);
1192
1193
1194
1195
1196
1197
1198
1199
1200void __brelse(struct buffer_head * buf)
1201{
1202 if (atomic_read(&buf->b_count)) {
1203 put_bh(buf);
1204 return;
1205 }
1206 WARN(1, KERN_ERR "VFS: brelse: Trying to free free buffer\n");
1207}
1208EXPORT_SYMBOL(__brelse);
1209
1210
1211
1212
1213
1214void __bforget(struct buffer_head *bh)
1215{
1216 clear_buffer_dirty(bh);
1217 if (bh->b_assoc_map) {
1218 struct address_space *buffer_mapping = bh->b_page->mapping;
1219
1220 spin_lock(&buffer_mapping->private_lock);
1221 list_del_init(&bh->b_assoc_buffers);
1222 bh->b_assoc_map = NULL;
1223 spin_unlock(&buffer_mapping->private_lock);
1224 }
1225 __brelse(bh);
1226}
1227EXPORT_SYMBOL(__bforget);
1228
1229static struct buffer_head *__bread_slow(struct buffer_head *bh)
1230{
1231 lock_buffer(bh);
1232 if (buffer_uptodate(bh)) {
1233 unlock_buffer(bh);
1234 return bh;
1235 } else {
1236 get_bh(bh);
1237 bh->b_end_io = end_buffer_read_sync;
1238 submit_bh(READ, bh);
1239 wait_on_buffer(bh);
1240 if (buffer_uptodate(bh))
1241 return bh;
1242 }
1243 brelse(bh);
1244 return NULL;
1245}
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261#define BH_LRU_SIZE 8
1262
1263struct bh_lru {
1264 struct buffer_head *bhs[BH_LRU_SIZE];
1265};
1266
1267static DEFINE_PER_CPU(struct bh_lru, bh_lrus) = {{ NULL }};
1268
1269#ifdef CONFIG_SMP
1270#define bh_lru_lock() local_irq_disable()
1271#define bh_lru_unlock() local_irq_enable()
1272#else
1273#define bh_lru_lock() preempt_disable()
1274#define bh_lru_unlock() preempt_enable()
1275#endif
1276
1277static inline void check_irqs_on(void)
1278{
1279#ifdef irqs_disabled
1280 BUG_ON(irqs_disabled());
1281#endif
1282}
1283
1284
1285
1286
1287static void bh_lru_install(struct buffer_head *bh)
1288{
1289 struct buffer_head *evictee = NULL;
1290
1291 check_irqs_on();
1292 bh_lru_lock();
1293 if (__this_cpu_read(bh_lrus.bhs[0]) != bh) {
1294 struct buffer_head *bhs[BH_LRU_SIZE];
1295 int in;
1296 int out = 0;
1297
1298 get_bh(bh);
1299 bhs[out++] = bh;
1300 for (in = 0; in < BH_LRU_SIZE; in++) {
1301 struct buffer_head *bh2 =
1302 __this_cpu_read(bh_lrus.bhs[in]);
1303
1304 if (bh2 == bh) {
1305 __brelse(bh2);
1306 } else {
1307 if (out >= BH_LRU_SIZE) {
1308 BUG_ON(evictee != NULL);
1309 evictee = bh2;
1310 } else {
1311 bhs[out++] = bh2;
1312 }
1313 }
1314 }
1315 while (out < BH_LRU_SIZE)
1316 bhs[out++] = NULL;
1317 memcpy(this_cpu_ptr(&bh_lrus.bhs), bhs, sizeof(bhs));
1318 }
1319 bh_lru_unlock();
1320
1321 if (evictee)
1322 __brelse(evictee);
1323}
1324
1325
1326
1327
1328static struct buffer_head *
1329lookup_bh_lru(struct block_device *bdev, sector_t block, unsigned size)
1330{
1331 struct buffer_head *ret = NULL;
1332 unsigned int i;
1333
1334 check_irqs_on();
1335 bh_lru_lock();
1336 for (i = 0; i < BH_LRU_SIZE; i++) {
1337 struct buffer_head *bh = __this_cpu_read(bh_lrus.bhs[i]);
1338
1339 if (bh && bh->b_bdev == bdev &&
1340 bh->b_blocknr == block && bh->b_size == size) {
1341 if (i) {
1342 while (i) {
1343 __this_cpu_write(bh_lrus.bhs[i],
1344 __this_cpu_read(bh_lrus.bhs[i - 1]));
1345 i--;
1346 }
1347 __this_cpu_write(bh_lrus.bhs[0], bh);
1348 }
1349 get_bh(bh);
1350 ret = bh;
1351 break;
1352 }
1353 }
1354 bh_lru_unlock();
1355 return ret;
1356}
1357
1358
1359
1360
1361
1362
1363struct buffer_head *
1364__find_get_block(struct block_device *bdev, sector_t block, unsigned size)
1365{
1366 struct buffer_head *bh = lookup_bh_lru(bdev, block, size);
1367
1368 if (bh == NULL) {
1369
1370 bh = __find_get_block_slow(bdev, block);
1371 if (bh)
1372 bh_lru_install(bh);
1373 } else
1374 touch_buffer(bh);
1375
1376 return bh;
1377}
1378EXPORT_SYMBOL(__find_get_block);
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388struct buffer_head *
1389__getblk(struct block_device *bdev, sector_t block, unsigned size)
1390{
1391 struct buffer_head *bh = __find_get_block(bdev, block, size);
1392
1393 might_sleep();
1394 if (bh == NULL)
1395 bh = __getblk_slow(bdev, block, size);
1396 return bh;
1397}
1398EXPORT_SYMBOL(__getblk);
1399
1400
1401
1402
1403void __breadahead(struct block_device *bdev, sector_t block, unsigned size)
1404{
1405 struct buffer_head *bh = __getblk(bdev, block, size);
1406 if (likely(bh)) {
1407 ll_rw_block(READA, 1, &bh);
1408 brelse(bh);
1409 }
1410}
1411EXPORT_SYMBOL(__breadahead);
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422struct buffer_head *
1423__bread(struct block_device *bdev, sector_t block, unsigned size)
1424{
1425 struct buffer_head *bh = __getblk(bdev, block, size);
1426
1427 if (likely(bh) && !buffer_uptodate(bh))
1428 bh = __bread_slow(bh);
1429 return bh;
1430}
1431EXPORT_SYMBOL(__bread);
1432
1433
1434
1435
1436
1437
1438static void invalidate_bh_lru(void *arg)
1439{
1440 struct bh_lru *b = &get_cpu_var(bh_lrus);
1441 int i;
1442
1443 for (i = 0; i < BH_LRU_SIZE; i++) {
1444 brelse(b->bhs[i]);
1445 b->bhs[i] = NULL;
1446 }
1447 put_cpu_var(bh_lrus);
1448}
1449
1450static bool has_bh_in_lru(int cpu, void *dummy)
1451{
1452 struct bh_lru *b = per_cpu_ptr(&bh_lrus, cpu);
1453 int i;
1454
1455 for (i = 0; i < BH_LRU_SIZE; i++) {
1456 if (b->bhs[i])
1457 return 1;
1458 }
1459
1460 return 0;
1461}
1462
1463void invalidate_bh_lrus(void)
1464{
1465 on_each_cpu_cond(has_bh_in_lru, invalidate_bh_lru, NULL, 1, GFP_KERNEL);
1466}
1467EXPORT_SYMBOL_GPL(invalidate_bh_lrus);
1468
1469void set_bh_page(struct buffer_head *bh,
1470 struct page *page, unsigned long offset)
1471{
1472 bh->b_page = page;
1473 BUG_ON(offset >= PAGE_SIZE);
1474 if (PageHighMem(page))
1475
1476
1477
1478 bh->b_data = (char *)(0 + offset);
1479 else
1480 bh->b_data = page_address(page) + offset;
1481}
1482EXPORT_SYMBOL(set_bh_page);
1483
1484
1485
1486
1487
1488
1489#define BUFFER_FLAGS_DISCARD \
1490 (1 << BH_Mapped | 1 << BH_New | 1 << BH_Req | \
1491 1 << BH_Delay | 1 << BH_Unwritten)
1492
1493static void discard_buffer(struct buffer_head * bh)
1494{
1495 unsigned long b_state, b_state_old;
1496
1497 lock_buffer(bh);
1498 clear_buffer_dirty(bh);
1499 bh->b_bdev = NULL;
1500 b_state = bh->b_state;
1501 for (;;) {
1502 b_state_old = cmpxchg(&bh->b_state, b_state,
1503 (b_state & ~BUFFER_FLAGS_DISCARD));
1504 if (b_state_old == b_state)
1505 break;
1506 b_state = b_state_old;
1507 }
1508 unlock_buffer(bh);
1509}
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527void block_invalidatepage(struct page *page, unsigned int offset,
1528 unsigned int length)
1529{
1530 struct buffer_head *head, *bh, *next;
1531 unsigned int curr_off = 0;
1532 unsigned int stop = length + offset;
1533
1534 BUG_ON(!PageLocked(page));
1535 if (!page_has_buffers(page))
1536 goto out;
1537
1538
1539
1540
1541 BUG_ON(stop > PAGE_CACHE_SIZE || stop < length);
1542
1543 head = page_buffers(page);
1544 bh = head;
1545 do {
1546 unsigned int next_off = curr_off + bh->b_size;
1547 next = bh->b_this_page;
1548
1549
1550
1551
1552 if (next_off > stop)
1553 goto out;
1554
1555
1556
1557
1558 if (offset <= curr_off)
1559 discard_buffer(bh);
1560 curr_off = next_off;
1561 bh = next;
1562 } while (bh != head);
1563
1564
1565
1566
1567
1568
1569 if (offset == 0)
1570 try_to_release_page(page, 0);
1571out:
1572 return;
1573}
1574EXPORT_SYMBOL(block_invalidatepage);
1575
1576
1577
1578
1579
1580
1581
1582void create_empty_buffers(struct page *page,
1583 unsigned long blocksize, unsigned long b_state)
1584{
1585 struct buffer_head *bh, *head, *tail;
1586
1587 head = alloc_page_buffers(page, blocksize, 1);
1588 bh = head;
1589 do {
1590 bh->b_state |= b_state;
1591 tail = bh;
1592 bh = bh->b_this_page;
1593 } while (bh);
1594 tail->b_this_page = head;
1595
1596 spin_lock(&page->mapping->private_lock);
1597 if (PageUptodate(page) || PageDirty(page)) {
1598 bh = head;
1599 do {
1600 if (PageDirty(page))
1601 set_buffer_dirty(bh);
1602 if (PageUptodate(page))
1603 set_buffer_uptodate(bh);
1604 bh = bh->b_this_page;
1605 } while (bh != head);
1606 }
1607 attach_page_buffers(page, head);
1608 spin_unlock(&page->mapping->private_lock);
1609}
1610EXPORT_SYMBOL(create_empty_buffers);
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628void unmap_underlying_metadata(struct block_device *bdev, sector_t block)
1629{
1630 struct buffer_head *old_bh;
1631
1632 might_sleep();
1633
1634 old_bh = __find_get_block_slow(bdev, block);
1635 if (old_bh) {
1636 clear_buffer_dirty(old_bh);
1637 wait_on_buffer(old_bh);
1638 clear_buffer_req(old_bh);
1639 __brelse(old_bh);
1640 }
1641}
1642EXPORT_SYMBOL(unmap_underlying_metadata);
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652static inline int block_size_bits(unsigned int blocksize)
1653{
1654 return ilog2(blocksize);
1655}
1656
1657static struct buffer_head *create_page_buffers(struct page *page, struct inode *inode, unsigned int b_state)
1658{
1659 BUG_ON(!PageLocked(page));
1660
1661 if (!page_has_buffers(page))
1662 create_empty_buffers(page, 1 << ACCESS_ONCE(inode->i_blkbits), b_state);
1663 return page_buffers(page);
1664}
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695static int __block_write_full_page(struct inode *inode, struct page *page,
1696 get_block_t *get_block, struct writeback_control *wbc,
1697 bh_end_io_t *handler)
1698{
1699 int err;
1700 sector_t block;
1701 sector_t last_block;
1702 struct buffer_head *bh, *head;
1703 unsigned int blocksize, bbits;
1704 int nr_underway = 0;
1705 int write_op = (wbc->sync_mode == WB_SYNC_ALL ?
1706 WRITE_SYNC : WRITE);
1707
1708 head = create_page_buffers(page, inode,
1709 (1 << BH_Dirty)|(1 << BH_Uptodate));
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721 bh = head;
1722 blocksize = bh->b_size;
1723 bbits = block_size_bits(blocksize);
1724
1725 block = (sector_t)page->index << (PAGE_CACHE_SHIFT - bbits);
1726 last_block = (i_size_read(inode) - 1) >> bbits;
1727
1728
1729
1730
1731
1732 do {
1733 if (block > last_block) {
1734
1735
1736
1737
1738
1739
1740
1741
1742 clear_buffer_dirty(bh);
1743 set_buffer_uptodate(bh);
1744 } else if ((!buffer_mapped(bh) || buffer_delay(bh)) &&
1745 buffer_dirty(bh)) {
1746 WARN_ON(bh->b_size != blocksize);
1747 err = get_block(inode, block, bh, 1);
1748 if (err)
1749 goto recover;
1750 clear_buffer_delay(bh);
1751 if (buffer_new(bh)) {
1752
1753 clear_buffer_new(bh);
1754 unmap_underlying_metadata(bh->b_bdev,
1755 bh->b_blocknr);
1756 }
1757 }
1758 bh = bh->b_this_page;
1759 block++;
1760 } while (bh != head);
1761
1762 do {
1763 if (!buffer_mapped(bh))
1764 continue;
1765
1766
1767
1768
1769
1770
1771
1772 if (wbc->sync_mode != WB_SYNC_NONE) {
1773 lock_buffer(bh);
1774 } else if (!trylock_buffer(bh)) {
1775 redirty_page_for_writepage(wbc, page);
1776 continue;
1777 }
1778 if (test_clear_buffer_dirty(bh)) {
1779 mark_buffer_async_write_endio(bh, handler);
1780 } else {
1781 unlock_buffer(bh);
1782 }
1783 } while ((bh = bh->b_this_page) != head);
1784
1785
1786
1787
1788
1789 BUG_ON(PageWriteback(page));
1790 set_page_writeback(page);
1791
1792 do {
1793 struct buffer_head *next = bh->b_this_page;
1794 if (buffer_async_write(bh)) {
1795 submit_bh(write_op, bh);
1796 nr_underway++;
1797 }
1798 bh = next;
1799 } while (bh != head);
1800 unlock_page(page);
1801
1802 err = 0;
1803done:
1804 if (nr_underway == 0) {
1805
1806
1807
1808
1809
1810 end_page_writeback(page);
1811
1812
1813
1814
1815
1816 }
1817 return err;
1818
1819recover:
1820
1821
1822
1823
1824
1825
1826 bh = head;
1827
1828 do {
1829 if (buffer_mapped(bh) && buffer_dirty(bh) &&
1830 !buffer_delay(bh)) {
1831 lock_buffer(bh);
1832 mark_buffer_async_write_endio(bh, handler);
1833 } else {
1834
1835
1836
1837
1838 clear_buffer_dirty(bh);
1839 }
1840 } while ((bh = bh->b_this_page) != head);
1841 SetPageError(page);
1842 BUG_ON(PageWriteback(page));
1843 mapping_set_error(page->mapping, err);
1844 set_page_writeback(page);
1845 do {
1846 struct buffer_head *next = bh->b_this_page;
1847 if (buffer_async_write(bh)) {
1848 clear_buffer_dirty(bh);
1849 submit_bh(write_op, bh);
1850 nr_underway++;
1851 }
1852 bh = next;
1853 } while (bh != head);
1854 unlock_page(page);
1855 goto done;
1856}
1857
1858
1859
1860
1861
1862
1863void page_zero_new_buffers(struct page *page, unsigned from, unsigned to)
1864{
1865 unsigned int block_start, block_end;
1866 struct buffer_head *head, *bh;
1867
1868 BUG_ON(!PageLocked(page));
1869 if (!page_has_buffers(page))
1870 return;
1871
1872 bh = head = page_buffers(page);
1873 block_start = 0;
1874 do {
1875 block_end = block_start + bh->b_size;
1876
1877 if (buffer_new(bh)) {
1878 if (block_end > from && block_start < to) {
1879 if (!PageUptodate(page)) {
1880 unsigned start, size;
1881
1882 start = max(from, block_start);
1883 size = min(to, block_end) - start;
1884
1885 zero_user(page, start, size);
1886 set_buffer_uptodate(bh);
1887 }
1888
1889 clear_buffer_new(bh);
1890 mark_buffer_dirty(bh);
1891 }
1892 }
1893
1894 block_start = block_end;
1895 bh = bh->b_this_page;
1896 } while (bh != head);
1897}
1898EXPORT_SYMBOL(page_zero_new_buffers);
1899
1900int __block_write_begin(struct page *page, loff_t pos, unsigned len,
1901 get_block_t *get_block)
1902{
1903 unsigned from = pos & (PAGE_CACHE_SIZE - 1);
1904 unsigned to = from + len;
1905 struct inode *inode = page->mapping->host;
1906 unsigned block_start, block_end;
1907 sector_t block;
1908 int err = 0;
1909 unsigned blocksize, bbits;
1910 struct buffer_head *bh, *head, *wait[2], **wait_bh=wait;
1911
1912 BUG_ON(!PageLocked(page));
1913 BUG_ON(from > PAGE_CACHE_SIZE);
1914 BUG_ON(to > PAGE_CACHE_SIZE);
1915 BUG_ON(from > to);
1916
1917 head = create_page_buffers(page, inode, 0);
1918 blocksize = head->b_size;
1919 bbits = block_size_bits(blocksize);
1920
1921 block = (sector_t)page->index << (PAGE_CACHE_SHIFT - bbits);
1922
1923 for(bh = head, block_start = 0; bh != head || !block_start;
1924 block++, block_start=block_end, bh = bh->b_this_page) {
1925 block_end = block_start + blocksize;
1926 if (block_end <= from || block_start >= to) {
1927 if (PageUptodate(page)) {
1928 if (!buffer_uptodate(bh))
1929 set_buffer_uptodate(bh);
1930 }
1931 continue;
1932 }
1933 if (buffer_new(bh))
1934 clear_buffer_new(bh);
1935 if (!buffer_mapped(bh)) {
1936 WARN_ON(bh->b_size != blocksize);
1937 err = get_block(inode, block, bh, 1);
1938 if (err)
1939 break;
1940 if (buffer_new(bh)) {
1941 unmap_underlying_metadata(bh->b_bdev,
1942 bh->b_blocknr);
1943 if (PageUptodate(page)) {
1944 clear_buffer_new(bh);
1945 set_buffer_uptodate(bh);
1946 mark_buffer_dirty(bh);
1947 continue;
1948 }
1949 if (block_end > to || block_start < from)
1950 zero_user_segments(page,
1951 to, block_end,
1952 block_start, from);
1953 continue;
1954 }
1955 }
1956 if (PageUptodate(page)) {
1957 if (!buffer_uptodate(bh))
1958 set_buffer_uptodate(bh);
1959 continue;
1960 }
1961 if (!buffer_uptodate(bh) && !buffer_delay(bh) &&
1962 !buffer_unwritten(bh) &&
1963 (block_start < from || block_end > to)) {
1964 ll_rw_block(READ, 1, &bh);
1965 *wait_bh++=bh;
1966 }
1967 }
1968
1969
1970
1971 while(wait_bh > wait) {
1972 wait_on_buffer(*--wait_bh);
1973 if (!buffer_uptodate(*wait_bh))
1974 err = -EIO;
1975 }
1976 if (unlikely(err))
1977 page_zero_new_buffers(page, from, to);
1978 return err;
1979}
1980EXPORT_SYMBOL(__block_write_begin);
1981
1982static int __block_commit_write(struct inode *inode, struct page *page,
1983 unsigned from, unsigned to)
1984{
1985 unsigned block_start, block_end;
1986 int partial = 0;
1987 unsigned blocksize;
1988 struct buffer_head *bh, *head;
1989
1990 bh = head = page_buffers(page);
1991 blocksize = bh->b_size;
1992
1993 block_start = 0;
1994 do {
1995 block_end = block_start + blocksize;
1996 if (block_end <= from || block_start >= to) {
1997 if (!buffer_uptodate(bh))
1998 partial = 1;
1999 } else {
2000 set_buffer_uptodate(bh);
2001 mark_buffer_dirty(bh);
2002 }
2003 clear_buffer_new(bh);
2004
2005 block_start = block_end;
2006 bh = bh->b_this_page;
2007 } while (bh != head);
2008
2009
2010
2011
2012
2013
2014
2015 if (!partial)
2016 SetPageUptodate(page);
2017 return 0;
2018}
2019
2020
2021
2022
2023
2024
2025
2026int block_write_begin(struct address_space *mapping, loff_t pos, unsigned len,
2027 unsigned flags, struct page **pagep, get_block_t *get_block)
2028{
2029 pgoff_t index = pos >> PAGE_CACHE_SHIFT;
2030 struct page *page;
2031 int status;
2032
2033 page = grab_cache_page_write_begin(mapping, index, flags);
2034 if (!page)
2035 return -ENOMEM;
2036
2037 status = __block_write_begin(page, pos, len, get_block);
2038 if (unlikely(status)) {
2039 unlock_page(page);
2040 page_cache_release(page);
2041 page = NULL;
2042 }
2043
2044 *pagep = page;
2045 return status;
2046}
2047EXPORT_SYMBOL(block_write_begin);
2048
2049int block_write_end(struct file *file, struct address_space *mapping,
2050 loff_t pos, unsigned len, unsigned copied,
2051 struct page *page, void *fsdata)
2052{
2053 struct inode *inode = mapping->host;
2054 unsigned start;
2055
2056 start = pos & (PAGE_CACHE_SIZE - 1);
2057
2058 if (unlikely(copied < len)) {
2059
2060
2061
2062
2063
2064
2065
2066
2067
2068
2069
2070
2071 if (!PageUptodate(page))
2072 copied = 0;
2073
2074 page_zero_new_buffers(page, start+copied, start+len);
2075 }
2076 flush_dcache_page(page);
2077
2078
2079 __block_commit_write(inode, page, start, start+copied);
2080
2081 return copied;
2082}
2083EXPORT_SYMBOL(block_write_end);
2084
2085int generic_write_end(struct file *file, struct address_space *mapping,
2086 loff_t pos, unsigned len, unsigned copied,
2087 struct page *page, void *fsdata)
2088{
2089 struct inode *inode = mapping->host;
2090 int i_size_changed = 0;
2091
2092 copied = block_write_end(file, mapping, pos, len, copied, page, fsdata);
2093
2094
2095
2096
2097
2098
2099
2100
2101 if (pos+copied > inode->i_size) {
2102 i_size_write(inode, pos+copied);
2103 i_size_changed = 1;
2104 }
2105
2106 unlock_page(page);
2107 page_cache_release(page);
2108
2109
2110
2111
2112
2113
2114
2115 if (i_size_changed)
2116 mark_inode_dirty(inode);
2117
2118 return copied;
2119}
2120EXPORT_SYMBOL(generic_write_end);
2121
2122
2123
2124
2125
2126
2127
2128
2129int block_is_partially_uptodate(struct page *page, unsigned long from,
2130 unsigned long count)
2131{
2132 unsigned block_start, block_end, blocksize;
2133 unsigned to;
2134 struct buffer_head *bh, *head;
2135 int ret = 1;
2136
2137 if (!page_has_buffers(page))
2138 return 0;
2139
2140 head = page_buffers(page);
2141 blocksize = head->b_size;
2142 to = min_t(unsigned, PAGE_CACHE_SIZE - from, count);
2143 to = from + to;
2144 if (from < blocksize && to > PAGE_CACHE_SIZE - blocksize)
2145 return 0;
2146
2147 bh = head;
2148 block_start = 0;
2149 do {
2150 block_end = block_start + blocksize;
2151 if (block_end > from && block_start < to) {
2152 if (!buffer_uptodate(bh)) {
2153 ret = 0;
2154 break;
2155 }
2156 if (block_end >= to)
2157 break;
2158 }
2159 block_start = block_end;
2160 bh = bh->b_this_page;
2161 } while (bh != head);
2162
2163 return ret;
2164}
2165EXPORT_SYMBOL(block_is_partially_uptodate);
2166
2167
2168
2169
2170
2171
2172
2173
2174int block_read_full_page(struct page *page, get_block_t *get_block)
2175{
2176 struct inode *inode = page->mapping->host;
2177 sector_t iblock, lblock;
2178 struct buffer_head *bh, *head, *arr[MAX_BUF_PER_PAGE];
2179 unsigned int blocksize, bbits;
2180 int nr, i;
2181 int fully_mapped = 1;
2182
2183 head = create_page_buffers(page, inode, 0);
2184 blocksize = head->b_size;
2185 bbits = block_size_bits(blocksize);
2186
2187 iblock = (sector_t)page->index << (PAGE_CACHE_SHIFT - bbits);
2188 lblock = (i_size_read(inode)+blocksize-1) >> bbits;
2189 bh = head;
2190 nr = 0;
2191 i = 0;
2192
2193 do {
2194 if (buffer_uptodate(bh))
2195 continue;
2196
2197 if (!buffer_mapped(bh)) {
2198 int err = 0;
2199
2200 fully_mapped = 0;
2201 if (iblock < lblock) {
2202 WARN_ON(bh->b_size != blocksize);
2203 err = get_block(inode, iblock, bh, 0);
2204 if (err)
2205 SetPageError(page);
2206 }
2207 if (!buffer_mapped(bh)) {
2208 zero_user(page, i * blocksize, blocksize);
2209 if (!err)
2210 set_buffer_uptodate(bh);
2211 continue;
2212 }
2213
2214
2215
2216
2217 if (buffer_uptodate(bh))
2218 continue;
2219 }
2220 arr[nr++] = bh;
2221 } while (i++, iblock++, (bh = bh->b_this_page) != head);
2222
2223 if (fully_mapped)
2224 SetPageMappedToDisk(page);
2225
2226 if (!nr) {
2227
2228
2229
2230
2231 if (!PageError(page))
2232 SetPageUptodate(page);
2233 unlock_page(page);
2234 return 0;
2235 }
2236
2237
2238 for (i = 0; i < nr; i++) {
2239 bh = arr[i];
2240 lock_buffer(bh);
2241 mark_buffer_async_read(bh);
2242 }
2243
2244
2245
2246
2247
2248
2249 for (i = 0; i < nr; i++) {
2250 bh = arr[i];
2251 if (buffer_uptodate(bh))
2252 end_buffer_async_read(bh, 1);
2253 else
2254 submit_bh(READ, bh);
2255 }
2256 return 0;
2257}
2258EXPORT_SYMBOL(block_read_full_page);
2259
2260
2261
2262
2263
2264int generic_cont_expand_simple(struct inode *inode, loff_t size)
2265{
2266 struct address_space *mapping = inode->i_mapping;
2267 struct page *page;
2268 void *fsdata;
2269 int err;
2270
2271 err = inode_newsize_ok(inode, size);
2272 if (err)
2273 goto out;
2274
2275 err = pagecache_write_begin(NULL, mapping, size, 0,
2276 AOP_FLAG_UNINTERRUPTIBLE|AOP_FLAG_CONT_EXPAND,
2277 &page, &fsdata);
2278 if (err)
2279 goto out;
2280
2281 err = pagecache_write_end(NULL, mapping, size, 0, 0, page, fsdata);
2282 BUG_ON(err > 0);
2283
2284out:
2285 return err;
2286}
2287EXPORT_SYMBOL(generic_cont_expand_simple);
2288
2289static int cont_expand_zero(struct file *file, struct address_space *mapping,
2290 loff_t pos, loff_t *bytes)
2291{
2292 struct inode *inode = mapping->host;
2293 unsigned blocksize = 1 << inode->i_blkbits;
2294 struct page *page;
2295 void *fsdata;
2296 pgoff_t index, curidx;
2297 loff_t curpos;
2298 unsigned zerofrom, offset, len;
2299 int err = 0;
2300
2301 index = pos >> PAGE_CACHE_SHIFT;
2302 offset = pos & ~PAGE_CACHE_MASK;
2303
2304 while (index > (curidx = (curpos = *bytes)>>PAGE_CACHE_SHIFT)) {
2305 zerofrom = curpos & ~PAGE_CACHE_MASK;
2306 if (zerofrom & (blocksize-1)) {
2307 *bytes |= (blocksize-1);
2308 (*bytes)++;
2309 }
2310 len = PAGE_CACHE_SIZE - zerofrom;
2311
2312 err = pagecache_write_begin(file, mapping, curpos, len,
2313 AOP_FLAG_UNINTERRUPTIBLE,
2314 &page, &fsdata);
2315 if (err)
2316 goto out;
2317 zero_user(page, zerofrom, len);
2318 err = pagecache_write_end(file, mapping, curpos, len, len,
2319 page, fsdata);
2320 if (err < 0)
2321 goto out;
2322 BUG_ON(err != len);
2323 err = 0;
2324
2325 balance_dirty_pages_ratelimited(mapping);
2326 }
2327
2328
2329 if (index == curidx) {
2330 zerofrom = curpos & ~PAGE_CACHE_MASK;
2331
2332 if (offset <= zerofrom) {
2333 goto out;
2334 }
2335 if (zerofrom & (blocksize-1)) {
2336 *bytes |= (blocksize-1);
2337 (*bytes)++;
2338 }
2339 len = offset - zerofrom;
2340
2341 err = pagecache_write_begin(file, mapping, curpos, len,
2342 AOP_FLAG_UNINTERRUPTIBLE,
2343 &page, &fsdata);
2344 if (err)
2345 goto out;
2346 zero_user(page, zerofrom, len);
2347 err = pagecache_write_end(file, mapping, curpos, len, len,
2348 page, fsdata);
2349 if (err < 0)
2350 goto out;
2351 BUG_ON(err != len);
2352 err = 0;
2353 }
2354out:
2355 return err;
2356}
2357
2358
2359
2360
2361
2362int cont_write_begin(struct file *file, struct address_space *mapping,
2363 loff_t pos, unsigned len, unsigned flags,
2364 struct page **pagep, void **fsdata,
2365 get_block_t *get_block, loff_t *bytes)
2366{
2367 struct inode *inode = mapping->host;
2368 unsigned blocksize = 1 << inode->i_blkbits;
2369 unsigned zerofrom;
2370 int err;
2371
2372 err = cont_expand_zero(file, mapping, pos, bytes);
2373 if (err)
2374 return err;
2375
2376 zerofrom = *bytes & ~PAGE_CACHE_MASK;
2377 if (pos+len > *bytes && zerofrom & (blocksize-1)) {
2378 *bytes |= (blocksize-1);
2379 (*bytes)++;
2380 }
2381
2382 return block_write_begin(mapping, pos, len, flags, pagep, get_block);
2383}
2384EXPORT_SYMBOL(cont_write_begin);
2385
2386int block_commit_write(struct page *page, unsigned from, unsigned to)
2387{
2388 struct inode *inode = page->mapping->host;
2389 __block_commit_write(inode,page,from,to);
2390 return 0;
2391}
2392EXPORT_SYMBOL(block_commit_write);
2393
2394
2395
2396
2397
2398
2399
2400
2401
2402
2403
2404
2405
2406
2407
2408
2409
2410
2411
2412int __block_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf,
2413 get_block_t get_block)
2414{
2415 struct page *page = vmf->page;
2416 struct inode *inode = file_inode(vma->vm_file);
2417 unsigned long end;
2418 loff_t size;
2419 int ret;
2420
2421 lock_page(page);
2422 size = i_size_read(inode);
2423 if ((page->mapping != inode->i_mapping) ||
2424 (page_offset(page) > size)) {
2425
2426 ret = -EFAULT;
2427 goto out_unlock;
2428 }
2429
2430
2431 if (((page->index + 1) << PAGE_CACHE_SHIFT) > size)
2432 end = size & ~PAGE_CACHE_MASK;
2433 else
2434 end = PAGE_CACHE_SIZE;
2435
2436 ret = __block_write_begin(page, 0, end, get_block);
2437 if (!ret)
2438 ret = block_commit_write(page, 0, end);
2439
2440 if (unlikely(ret < 0))
2441 goto out_unlock;
2442 set_page_dirty(page);
2443 wait_for_stable_page(page);
2444 return 0;
2445out_unlock:
2446 unlock_page(page);
2447 return ret;
2448}
2449EXPORT_SYMBOL(__block_page_mkwrite);
2450
2451int block_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf,
2452 get_block_t get_block)
2453{
2454 int ret;
2455 struct super_block *sb = file_inode(vma->vm_file)->i_sb;
2456
2457 sb_start_pagefault(sb);
2458
2459
2460
2461
2462
2463 file_update_time(vma->vm_file);
2464
2465 ret = __block_page_mkwrite(vma, vmf, get_block);
2466 sb_end_pagefault(sb);
2467 return block_page_mkwrite_return(ret);
2468}
2469EXPORT_SYMBOL(block_page_mkwrite);
2470
2471
2472
2473
2474
2475
2476static void end_buffer_read_nobh(struct buffer_head *bh, int uptodate)
2477{
2478 __end_buffer_read_notouch(bh, uptodate);
2479}
2480
2481
2482
2483
2484
2485
2486static void attach_nobh_buffers(struct page *page, struct buffer_head *head)
2487{
2488 struct buffer_head *bh;
2489
2490 BUG_ON(!PageLocked(page));
2491
2492 spin_lock(&page->mapping->private_lock);
2493 bh = head;
2494 do {
2495 if (PageDirty(page))
2496 set_buffer_dirty(bh);
2497 if (!bh->b_this_page)
2498 bh->b_this_page = head;
2499 bh = bh->b_this_page;
2500 } while (bh != head);
2501 attach_page_buffers(page, head);
2502 spin_unlock(&page->mapping->private_lock);
2503}
2504
2505
2506
2507
2508
2509
2510int nobh_write_begin(struct address_space *mapping,
2511 loff_t pos, unsigned len, unsigned flags,
2512 struct page **pagep, void **fsdata,
2513 get_block_t *get_block)
2514{
2515 struct inode *inode = mapping->host;
2516 const unsigned blkbits = inode->i_blkbits;
2517 const unsigned blocksize = 1 << blkbits;
2518 struct buffer_head *head, *bh;
2519 struct page *page;
2520 pgoff_t index;
2521 unsigned from, to;
2522 unsigned block_in_page;
2523 unsigned block_start, block_end;
2524 sector_t block_in_file;
2525 int nr_reads = 0;
2526 int ret = 0;
2527 int is_mapped_to_disk = 1;
2528
2529 index = pos >> PAGE_CACHE_SHIFT;
2530 from = pos & (PAGE_CACHE_SIZE - 1);
2531 to = from + len;
2532
2533 page = grab_cache_page_write_begin(mapping, index, flags);
2534 if (!page)
2535 return -ENOMEM;
2536 *pagep = page;
2537 *fsdata = NULL;
2538
2539 if (page_has_buffers(page)) {
2540 ret = __block_write_begin(page, pos, len, get_block);
2541 if (unlikely(ret))
2542 goto out_release;
2543 return ret;
2544 }
2545
2546 if (PageMappedToDisk(page))
2547 return 0;
2548
2549
2550
2551
2552
2553
2554
2555
2556
2557
2558 head = alloc_page_buffers(page, blocksize, 0);
2559 if (!head) {
2560 ret = -ENOMEM;
2561 goto out_release;
2562 }
2563
2564 block_in_file = (sector_t)page->index << (PAGE_CACHE_SHIFT - blkbits);
2565
2566
2567
2568
2569
2570
2571 for (block_start = 0, block_in_page = 0, bh = head;
2572 block_start < PAGE_CACHE_SIZE;
2573 block_in_page++, block_start += blocksize, bh = bh->b_this_page) {
2574 int create;
2575
2576 block_end = block_start + blocksize;
2577 bh->b_state = 0;
2578 create = 1;
2579 if (block_start >= to)
2580 create = 0;
2581 ret = get_block(inode, block_in_file + block_in_page,
2582 bh, create);
2583 if (ret)
2584 goto failed;
2585 if (!buffer_mapped(bh))
2586 is_mapped_to_disk = 0;
2587 if (buffer_new(bh))
2588 unmap_underlying_metadata(bh->b_bdev, bh->b_blocknr);
2589 if (PageUptodate(page)) {
2590 set_buffer_uptodate(bh);
2591 continue;
2592 }
2593 if (buffer_new(bh) || !buffer_mapped(bh)) {
2594 zero_user_segments(page, block_start, from,
2595 to, block_end);
2596 continue;
2597 }
2598 if (buffer_uptodate(bh))
2599 continue;
2600 if (block_start < from || block_end > to) {
2601 lock_buffer(bh);
2602 bh->b_end_io = end_buffer_read_nobh;
2603 submit_bh(READ, bh);
2604 nr_reads++;
2605 }
2606 }
2607
2608 if (nr_reads) {
2609
2610
2611
2612
2613
2614 for (bh = head; bh; bh = bh->b_this_page) {
2615 wait_on_buffer(bh);
2616 if (!buffer_uptodate(bh))
2617 ret = -EIO;
2618 }
2619 if (ret)
2620 goto failed;
2621 }
2622
2623 if (is_mapped_to_disk)
2624 SetPageMappedToDisk(page);
2625
2626 *fsdata = head;
2627
2628 return 0;
2629
2630failed:
2631 BUG_ON(!ret);
2632
2633
2634
2635
2636
2637
2638
2639 attach_nobh_buffers(page, head);
2640 page_zero_new_buffers(page, from, to);
2641
2642out_release:
2643 unlock_page(page);
2644 page_cache_release(page);
2645 *pagep = NULL;
2646
2647 return ret;
2648}
2649EXPORT_SYMBOL(nobh_write_begin);
2650
2651int nobh_write_end(struct file *file, struct address_space *mapping,
2652 loff_t pos, unsigned len, unsigned copied,
2653 struct page *page, void *fsdata)
2654{
2655 struct inode *inode = page->mapping->host;
2656 struct buffer_head *head = fsdata;
2657 struct buffer_head *bh;
2658 BUG_ON(fsdata != NULL && page_has_buffers(page));
2659
2660 if (unlikely(copied < len) && head)
2661 attach_nobh_buffers(page, head);
2662 if (page_has_buffers(page))
2663 return generic_write_end(file, mapping, pos, len,
2664 copied, page, fsdata);
2665
2666 SetPageUptodate(page);
2667 set_page_dirty(page);
2668 if (pos+copied > inode->i_size) {
2669 i_size_write(inode, pos+copied);
2670 mark_inode_dirty(inode);
2671 }
2672
2673 unlock_page(page);
2674 page_cache_release(page);
2675
2676 while (head) {
2677 bh = head;
2678 head = head->b_this_page;
2679 free_buffer_head(bh);
2680 }
2681
2682 return copied;
2683}
2684EXPORT_SYMBOL(nobh_write_end);
2685
2686
2687
2688
2689
2690
2691int nobh_writepage(struct page *page, get_block_t *get_block,
2692 struct writeback_control *wbc)
2693{
2694 struct inode * const inode = page->mapping->host;
2695 loff_t i_size = i_size_read(inode);
2696 const pgoff_t end_index = i_size >> PAGE_CACHE_SHIFT;
2697 unsigned offset;
2698 int ret;
2699
2700
2701 if (page->index < end_index)
2702 goto out;
2703
2704
2705 offset = i_size & (PAGE_CACHE_SIZE-1);
2706 if (page->index >= end_index+1 || !offset) {
2707
2708
2709
2710
2711
2712#if 0
2713
2714 if (page->mapping->a_ops->invalidatepage)
2715 page->mapping->a_ops->invalidatepage(page, offset);
2716#endif
2717 unlock_page(page);
2718 return 0;
2719 }
2720
2721
2722
2723
2724
2725
2726
2727
2728 zero_user_segment(page, offset, PAGE_CACHE_SIZE);
2729out:
2730 ret = mpage_writepage(page, get_block, wbc);
2731 if (ret == -EAGAIN)
2732 ret = __block_write_full_page(inode, page, get_block, wbc,
2733 end_buffer_async_write);
2734 return ret;
2735}
2736EXPORT_SYMBOL(nobh_writepage);
2737
2738int nobh_truncate_page(struct address_space *mapping,
2739 loff_t from, get_block_t *get_block)
2740{
2741 pgoff_t index = from >> PAGE_CACHE_SHIFT;
2742 unsigned offset = from & (PAGE_CACHE_SIZE-1);
2743 unsigned blocksize;
2744 sector_t iblock;
2745 unsigned length, pos;
2746 struct inode *inode = mapping->host;
2747 struct page *page;
2748 struct buffer_head map_bh;
2749 int err;
2750
2751 blocksize = 1 << inode->i_blkbits;
2752 length = offset & (blocksize - 1);
2753
2754
2755 if (!length)
2756 return 0;
2757
2758 length = blocksize - length;
2759 iblock = (sector_t)index << (PAGE_CACHE_SHIFT - inode->i_blkbits);
2760
2761 page = grab_cache_page(mapping, index);
2762 err = -ENOMEM;
2763 if (!page)
2764 goto out;
2765
2766 if (page_has_buffers(page)) {
2767has_buffers:
2768 unlock_page(page);
2769 page_cache_release(page);
2770 return block_truncate_page(mapping, from, get_block);
2771 }
2772
2773
2774 pos = blocksize;
2775 while (offset >= pos) {
2776 iblock++;
2777 pos += blocksize;
2778 }
2779
2780 map_bh.b_size = blocksize;
2781 map_bh.b_state = 0;
2782 err = get_block(inode, iblock, &map_bh, 0);
2783 if (err)
2784 goto unlock;
2785
2786 if (!buffer_mapped(&map_bh))
2787 goto unlock;
2788
2789
2790 if (!PageUptodate(page)) {
2791 err = mapping->a_ops->readpage(NULL, page);
2792 if (err) {
2793 page_cache_release(page);
2794 goto out;
2795 }
2796 lock_page(page);
2797 if (!PageUptodate(page)) {
2798 err = -EIO;
2799 goto unlock;
2800 }
2801 if (page_has_buffers(page))
2802 goto has_buffers;
2803 }
2804 zero_user(page, offset, length);
2805 set_page_dirty(page);
2806 err = 0;
2807
2808unlock:
2809 unlock_page(page);
2810 page_cache_release(page);
2811out:
2812 return err;
2813}
2814EXPORT_SYMBOL(nobh_truncate_page);
2815
2816int block_truncate_page(struct address_space *mapping,
2817 loff_t from, get_block_t *get_block)
2818{
2819 pgoff_t index = from >> PAGE_CACHE_SHIFT;
2820 unsigned offset = from & (PAGE_CACHE_SIZE-1);
2821 unsigned blocksize;
2822 sector_t iblock;
2823 unsigned length, pos;
2824 struct inode *inode = mapping->host;
2825 struct page *page;
2826 struct buffer_head *bh;
2827 int err;
2828
2829 blocksize = 1 << inode->i_blkbits;
2830 length = offset & (blocksize - 1);
2831
2832
2833 if (!length)
2834 return 0;
2835
2836 length = blocksize - length;
2837 iblock = (sector_t)index << (PAGE_CACHE_SHIFT - inode->i_blkbits);
2838
2839 page = grab_cache_page(mapping, index);
2840 err = -ENOMEM;
2841 if (!page)
2842 goto out;
2843
2844 if (!page_has_buffers(page))
2845 create_empty_buffers(page, blocksize, 0);
2846
2847
2848 bh = page_buffers(page);
2849 pos = blocksize;
2850 while (offset >= pos) {
2851 bh = bh->b_this_page;
2852 iblock++;
2853 pos += blocksize;
2854 }
2855
2856 err = 0;
2857 if (!buffer_mapped(bh)) {
2858 WARN_ON(bh->b_size != blocksize);
2859 err = get_block(inode, iblock, bh, 0);
2860 if (err)
2861 goto unlock;
2862
2863 if (!buffer_mapped(bh))
2864 goto unlock;
2865 }
2866
2867
2868 if (PageUptodate(page))
2869 set_buffer_uptodate(bh);
2870
2871 if (!buffer_uptodate(bh) && !buffer_delay(bh) && !buffer_unwritten(bh)) {
2872 err = -EIO;
2873 ll_rw_block(READ, 1, &bh);
2874 wait_on_buffer(bh);
2875
2876 if (!buffer_uptodate(bh))
2877 goto unlock;
2878 }
2879
2880 zero_user(page, offset, length);
2881 mark_buffer_dirty(bh);
2882 err = 0;
2883
2884unlock:
2885 unlock_page(page);
2886 page_cache_release(page);
2887out:
2888 return err;
2889}
2890EXPORT_SYMBOL(block_truncate_page);
2891
2892
2893
2894
2895int block_write_full_page(struct page *page, get_block_t *get_block,
2896 struct writeback_control *wbc)
2897{
2898 struct inode * const inode = page->mapping->host;
2899 loff_t i_size = i_size_read(inode);
2900 const pgoff_t end_index = i_size >> PAGE_CACHE_SHIFT;
2901 unsigned offset;
2902
2903
2904 if (page->index < end_index)
2905 return __block_write_full_page(inode, page, get_block, wbc,
2906 end_buffer_async_write);
2907
2908
2909 offset = i_size & (PAGE_CACHE_SIZE-1);
2910 if (page->index >= end_index+1 || !offset) {
2911
2912
2913
2914
2915
2916 do_invalidatepage(page, 0, PAGE_CACHE_SIZE);
2917 unlock_page(page);
2918 return 0;
2919 }
2920
2921
2922
2923
2924
2925
2926
2927
2928 zero_user_segment(page, offset, PAGE_CACHE_SIZE);
2929 return __block_write_full_page(inode, page, get_block, wbc,
2930 end_buffer_async_write);
2931}
2932EXPORT_SYMBOL(block_write_full_page);
2933
2934sector_t generic_block_bmap(struct address_space *mapping, sector_t block,
2935 get_block_t *get_block)
2936{
2937 struct buffer_head tmp;
2938 struct inode *inode = mapping->host;
2939 tmp.b_state = 0;
2940 tmp.b_blocknr = 0;
2941 tmp.b_size = 1 << inode->i_blkbits;
2942 get_block(inode, block, &tmp, 0);
2943 return tmp.b_blocknr;
2944}
2945EXPORT_SYMBOL(generic_block_bmap);
2946
2947static void end_bio_bh_io_sync(struct bio *bio, int err)
2948{
2949 struct buffer_head *bh = bio->bi_private;
2950
2951 if (err == -EOPNOTSUPP) {
2952 set_bit(BIO_EOPNOTSUPP, &bio->bi_flags);
2953 }
2954
2955 if (unlikely (test_bit(BIO_QUIET,&bio->bi_flags)))
2956 set_bit(BH_Quiet, &bh->b_state);
2957
2958 bh->b_end_io(bh, test_bit(BIO_UPTODATE, &bio->bi_flags));
2959 bio_put(bio);
2960}
2961
2962
2963
2964
2965
2966
2967
2968
2969
2970
2971
2972
2973
2974static void guard_bh_eod(int rw, struct bio *bio, struct buffer_head *bh)
2975{
2976 sector_t maxsector;
2977 unsigned bytes;
2978
2979 maxsector = i_size_read(bio->bi_bdev->bd_inode) >> 9;
2980 if (!maxsector)
2981 return;
2982
2983
2984
2985
2986
2987
2988 if (unlikely(bio->bi_iter.bi_sector >= maxsector))
2989 return;
2990
2991 maxsector -= bio->bi_iter.bi_sector;
2992 bytes = bio->bi_iter.bi_size;
2993 if (likely((bytes >> 9) <= maxsector))
2994 return;
2995
2996
2997 bytes = maxsector << 9;
2998
2999
3000 bio->bi_iter.bi_size = bytes;
3001 bio->bi_io_vec[0].bv_len = bytes;
3002
3003
3004 if ((rw & RW_MASK) == READ) {
3005 void *kaddr = kmap_atomic(bh->b_page);
3006 memset(kaddr + bh_offset(bh) + bytes, 0, bh->b_size - bytes);
3007 kunmap_atomic(kaddr);
3008 flush_dcache_page(bh->b_page);
3009 }
3010}
3011
3012int _submit_bh(int rw, struct buffer_head *bh, unsigned long bio_flags)
3013{
3014 struct bio *bio;
3015 int ret = 0;
3016
3017 BUG_ON(!buffer_locked(bh));
3018 BUG_ON(!buffer_mapped(bh));
3019 BUG_ON(!bh->b_end_io);
3020 BUG_ON(buffer_delay(bh));
3021 BUG_ON(buffer_unwritten(bh));
3022
3023
3024
3025
3026 if (test_set_buffer_req(bh) && (rw & WRITE))
3027 clear_buffer_write_io_error(bh);
3028
3029
3030
3031
3032
3033 bio = bio_alloc(GFP_NOIO, 1);
3034
3035 bio->bi_iter.bi_sector = bh->b_blocknr * (bh->b_size >> 9);
3036 bio->bi_bdev = bh->b_bdev;
3037 bio->bi_io_vec[0].bv_page = bh->b_page;
3038 bio->bi_io_vec[0].bv_len = bh->b_size;
3039 bio->bi_io_vec[0].bv_offset = bh_offset(bh);
3040
3041 bio->bi_vcnt = 1;
3042 bio->bi_iter.bi_size = bh->b_size;
3043
3044 bio->bi_end_io = end_bio_bh_io_sync;
3045 bio->bi_private = bh;
3046 bio->bi_flags |= bio_flags;
3047
3048
3049 guard_bh_eod(rw, bio, bh);
3050
3051 if (buffer_meta(bh))
3052 rw |= REQ_META;
3053 if (buffer_prio(bh))
3054 rw |= REQ_PRIO;
3055
3056 bio_get(bio);
3057 submit_bio(rw, bio);
3058
3059 if (bio_flagged(bio, BIO_EOPNOTSUPP))
3060 ret = -EOPNOTSUPP;
3061
3062 bio_put(bio);
3063 return ret;
3064}
3065EXPORT_SYMBOL_GPL(_submit_bh);
3066
3067int submit_bh(int rw, struct buffer_head *bh)
3068{
3069 return _submit_bh(rw, bh, 0);
3070}
3071EXPORT_SYMBOL(submit_bh);
3072
3073
3074
3075
3076
3077
3078
3079
3080
3081
3082
3083
3084
3085
3086
3087
3088
3089
3090
3091
3092
3093
3094
3095
3096
3097
3098void ll_rw_block(int rw, int nr, struct buffer_head *bhs[])
3099{
3100 int i;
3101
3102 for (i = 0; i < nr; i++) {
3103 struct buffer_head *bh = bhs[i];
3104
3105 if (!trylock_buffer(bh))
3106 continue;
3107 if (rw == WRITE) {
3108 if (test_clear_buffer_dirty(bh)) {
3109 bh->b_end_io = end_buffer_write_sync;
3110 get_bh(bh);
3111 submit_bh(WRITE, bh);
3112 continue;
3113 }
3114 } else {
3115 if (!buffer_uptodate(bh)) {
3116 bh->b_end_io = end_buffer_read_sync;
3117 get_bh(bh);
3118 submit_bh(rw, bh);
3119 continue;
3120 }
3121 }
3122 unlock_buffer(bh);
3123 }
3124}
3125EXPORT_SYMBOL(ll_rw_block);
3126
3127void write_dirty_buffer(struct buffer_head *bh, int rw)
3128{
3129 lock_buffer(bh);
3130 if (!test_clear_buffer_dirty(bh)) {
3131 unlock_buffer(bh);
3132 return;
3133 }
3134 bh->b_end_io = end_buffer_write_sync;
3135 get_bh(bh);
3136 submit_bh(rw, bh);
3137}
3138EXPORT_SYMBOL(write_dirty_buffer);
3139
3140
3141
3142
3143
3144
3145int __sync_dirty_buffer(struct buffer_head *bh, int rw)
3146{
3147 int ret = 0;
3148
3149 WARN_ON(atomic_read(&bh->b_count) < 1);
3150 lock_buffer(bh);
3151 if (test_clear_buffer_dirty(bh)) {
3152 get_bh(bh);
3153 bh->b_end_io = end_buffer_write_sync;
3154 ret = submit_bh(rw, bh);
3155 wait_on_buffer(bh);
3156 if (!ret && !buffer_uptodate(bh))
3157 ret = -EIO;
3158 } else {
3159 unlock_buffer(bh);
3160 }
3161 return ret;
3162}
3163EXPORT_SYMBOL(__sync_dirty_buffer);
3164
3165int sync_dirty_buffer(struct buffer_head *bh)
3166{
3167 return __sync_dirty_buffer(bh, WRITE_SYNC);
3168}
3169EXPORT_SYMBOL(sync_dirty_buffer);
3170
3171
3172
3173
3174
3175
3176
3177
3178
3179
3180
3181
3182
3183
3184
3185
3186
3187
3188
3189
3190
3191static inline int buffer_busy(struct buffer_head *bh)
3192{
3193 return atomic_read(&bh->b_count) |
3194 (bh->b_state & ((1 << BH_Dirty) | (1 << BH_Lock)));
3195}
3196
3197static int
3198drop_buffers(struct page *page, struct buffer_head **buffers_to_free)
3199{
3200 struct buffer_head *head = page_buffers(page);
3201 struct buffer_head *bh;
3202
3203 bh = head;
3204 do {
3205 if (buffer_write_io_error(bh) && page->mapping)
3206 set_bit(AS_EIO, &page->mapping->flags);
3207 if (buffer_busy(bh))
3208 goto failed;
3209 bh = bh->b_this_page;
3210 } while (bh != head);
3211
3212 do {
3213 struct buffer_head *next = bh->b_this_page;
3214
3215 if (bh->b_assoc_map)
3216 __remove_assoc_queue(bh);
3217 bh = next;
3218 } while (bh != head);
3219 *buffers_to_free = head;
3220 __clear_page_buffers(page);
3221 return 1;
3222failed:
3223 return 0;
3224}
3225
3226int try_to_free_buffers(struct page *page)
3227{
3228 struct address_space * const mapping = page->mapping;
3229 struct buffer_head *buffers_to_free = NULL;
3230 int ret = 0;
3231
3232 BUG_ON(!PageLocked(page));
3233 if (PageWriteback(page))
3234 return 0;
3235
3236 if (mapping == NULL) {
3237 ret = drop_buffers(page, &buffers_to_free);
3238 goto out;
3239 }
3240
3241 spin_lock(&mapping->private_lock);
3242 ret = drop_buffers(page, &buffers_to_free);
3243
3244
3245
3246
3247
3248
3249
3250
3251
3252
3253
3254
3255
3256
3257
3258 if (ret)
3259 cancel_dirty_page(page, PAGE_CACHE_SIZE);
3260 spin_unlock(&mapping->private_lock);
3261out:
3262 if (buffers_to_free) {
3263 struct buffer_head *bh = buffers_to_free;
3264
3265 do {
3266 struct buffer_head *next = bh->b_this_page;
3267 free_buffer_head(bh);
3268 bh = next;
3269 } while (bh != buffers_to_free);
3270 }
3271 return ret;
3272}
3273EXPORT_SYMBOL(try_to_free_buffers);
3274
3275
3276
3277
3278
3279
3280
3281
3282SYSCALL_DEFINE2(bdflush, int, func, long, data)
3283{
3284 static int msg_count;
3285
3286 if (!capable(CAP_SYS_ADMIN))
3287 return -EPERM;
3288
3289 if (msg_count < 5) {
3290 msg_count++;
3291 printk(KERN_INFO
3292 "warning: process `%s' used the obsolete bdflush"
3293 " system call\n", current->comm);
3294 printk(KERN_INFO "Fix your initscripts?\n");
3295 }
3296
3297 if (func == 1)
3298 do_exit(0);
3299 return 0;
3300}
3301
3302
3303
3304
3305static struct kmem_cache *bh_cachep __read_mostly;
3306
3307
3308
3309
3310
3311static unsigned long max_buffer_heads;
3312
3313int buffer_heads_over_limit;
3314
3315struct bh_accounting {
3316 int nr;
3317 int ratelimit;
3318};
3319
3320static DEFINE_PER_CPU(struct bh_accounting, bh_accounting) = {0, 0};
3321
3322static void recalc_bh_state(void)
3323{
3324 int i;
3325 int tot = 0;
3326
3327 if (__this_cpu_inc_return(bh_accounting.ratelimit) - 1 < 4096)
3328 return;
3329 __this_cpu_write(bh_accounting.ratelimit, 0);
3330 for_each_online_cpu(i)
3331 tot += per_cpu(bh_accounting, i).nr;
3332 buffer_heads_over_limit = (tot > max_buffer_heads);
3333}
3334
3335struct buffer_head *alloc_buffer_head(gfp_t gfp_flags)
3336{
3337 struct buffer_head *ret = kmem_cache_zalloc(bh_cachep, gfp_flags);
3338 if (ret) {
3339 INIT_LIST_HEAD(&ret->b_assoc_buffers);
3340 preempt_disable();
3341 __this_cpu_inc(bh_accounting.nr);
3342 recalc_bh_state();
3343 preempt_enable();
3344 }
3345 return ret;
3346}
3347EXPORT_SYMBOL(alloc_buffer_head);
3348
3349void free_buffer_head(struct buffer_head *bh)
3350{
3351 BUG_ON(!list_empty(&bh->b_assoc_buffers));
3352 kmem_cache_free(bh_cachep, bh);
3353 preempt_disable();
3354 __this_cpu_dec(bh_accounting.nr);
3355 recalc_bh_state();
3356 preempt_enable();
3357}
3358EXPORT_SYMBOL(free_buffer_head);
3359
3360static void buffer_exit_cpu(int cpu)
3361{
3362 int i;
3363 struct bh_lru *b = &per_cpu(bh_lrus, cpu);
3364
3365 for (i = 0; i < BH_LRU_SIZE; i++) {
3366 brelse(b->bhs[i]);
3367 b->bhs[i] = NULL;
3368 }
3369 this_cpu_add(bh_accounting.nr, per_cpu(bh_accounting, cpu).nr);
3370 per_cpu(bh_accounting, cpu).nr = 0;
3371}
3372
3373static int buffer_cpu_notify(struct notifier_block *self,
3374 unsigned long action, void *hcpu)
3375{
3376 if (action == CPU_DEAD || action == CPU_DEAD_FROZEN)
3377 buffer_exit_cpu((unsigned long)hcpu);
3378 return NOTIFY_OK;
3379}
3380
3381
3382
3383
3384
3385
3386
3387
3388int bh_uptodate_or_lock(struct buffer_head *bh)
3389{
3390 if (!buffer_uptodate(bh)) {
3391 lock_buffer(bh);
3392 if (!buffer_uptodate(bh))
3393 return 0;
3394 unlock_buffer(bh);
3395 }
3396 return 1;
3397}
3398EXPORT_SYMBOL(bh_uptodate_or_lock);
3399
3400
3401
3402
3403
3404
3405
3406int bh_submit_read(struct buffer_head *bh)
3407{
3408 BUG_ON(!buffer_locked(bh));
3409
3410 if (buffer_uptodate(bh)) {
3411 unlock_buffer(bh);
3412 return 0;
3413 }
3414
3415 get_bh(bh);
3416 bh->b_end_io = end_buffer_read_sync;
3417 submit_bh(READ, bh);
3418 wait_on_buffer(bh);
3419 if (buffer_uptodate(bh))
3420 return 0;
3421 return -EIO;
3422}
3423EXPORT_SYMBOL(bh_submit_read);
3424
3425void __init buffer_init(void)
3426{
3427 unsigned long nrpages;
3428
3429 bh_cachep = kmem_cache_create("buffer_head",
3430 sizeof(struct buffer_head), 0,
3431 (SLAB_RECLAIM_ACCOUNT|SLAB_PANIC|
3432 SLAB_MEM_SPREAD),
3433 NULL);
3434
3435
3436
3437
3438 nrpages = (nr_free_buffer_pages() * 10) / 100;
3439 max_buffer_heads = nrpages * (PAGE_SIZE / sizeof(struct buffer_head));
3440 hotcpu_notifier(buffer_cpu_notify, 0);
3441}
3442