1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21#include <linux/kernel.h>
22#include <linux/syscalls.h>
23#include <linux/fs.h>
24#include <linux/mm.h>
25#include <linux/percpu.h>
26#include <linux/slab.h>
27#include <linux/capability.h>
28#include <linux/blkdev.h>
29#include <linux/file.h>
30#include <linux/quotaops.h>
31#include <linux/highmem.h>
32#include <linux/export.h>
33#include <linux/writeback.h>
34#include <linux/hash.h>
35#include <linux/suspend.h>
36#include <linux/buffer_head.h>
37#include <linux/task_io_accounting_ops.h>
38#include <linux/bio.h>
39#include <linux/notifier.h>
40#include <linux/cpu.h>
41#include <linux/bitops.h>
42#include <linux/mpage.h>
43#include <linux/bit_spinlock.h>
44#include <trace/events/block.h>
45
46static int fsync_buffers_list(spinlock_t *lock, struct list_head *list);
47
48#define BH_ENTRY(list) list_entry((list), struct buffer_head, b_assoc_buffers)
49
50void init_buffer(struct buffer_head *bh, bh_end_io_t *handler, void *private)
51{
52 bh->b_end_io = handler;
53 bh->b_private = private;
54}
55EXPORT_SYMBOL(init_buffer);
56
57inline void touch_buffer(struct buffer_head *bh)
58{
59 trace_block_touch_buffer(bh);
60 mark_page_accessed(bh->b_page);
61}
62EXPORT_SYMBOL(touch_buffer);
63
64static int sleep_on_buffer(void *word)
65{
66 io_schedule();
67 return 0;
68}
69
70void __lock_buffer(struct buffer_head *bh)
71{
72 wait_on_bit_lock(&bh->b_state, BH_Lock, sleep_on_buffer,
73 TASK_UNINTERRUPTIBLE);
74}
75EXPORT_SYMBOL(__lock_buffer);
76
77void unlock_buffer(struct buffer_head *bh)
78{
79 clear_bit_unlock(BH_Lock, &bh->b_state);
80 smp_mb__after_clear_bit();
81 wake_up_bit(&bh->b_state, BH_Lock);
82}
83EXPORT_SYMBOL(unlock_buffer);
84
85
86
87
88
89
90void buffer_check_dirty_writeback(struct page *page,
91 bool *dirty, bool *writeback)
92{
93 struct buffer_head *head, *bh;
94 *dirty = false;
95 *writeback = false;
96
97 BUG_ON(!PageLocked(page));
98
99 if (!page_has_buffers(page))
100 return;
101
102 if (PageWriteback(page))
103 *writeback = true;
104
105 head = page_buffers(page);
106 bh = head;
107 do {
108 if (buffer_locked(bh))
109 *writeback = true;
110
111 if (buffer_dirty(bh))
112 *dirty = true;
113
114 bh = bh->b_this_page;
115 } while (bh != head);
116}
117EXPORT_SYMBOL(buffer_check_dirty_writeback);
118
119
120
121
122
123
124void __wait_on_buffer(struct buffer_head * bh)
125{
126 wait_on_bit(&bh->b_state, BH_Lock, sleep_on_buffer, TASK_UNINTERRUPTIBLE);
127}
128EXPORT_SYMBOL(__wait_on_buffer);
129
130static void
131__clear_page_buffers(struct page *page)
132{
133 ClearPagePrivate(page);
134 set_page_private(page, 0);
135 page_cache_release(page);
136}
137
138
139static int quiet_error(struct buffer_head *bh)
140{
141 if (!test_bit(BH_Quiet, &bh->b_state) && printk_ratelimit())
142 return 0;
143 return 1;
144}
145
146
147static void buffer_io_error(struct buffer_head *bh)
148{
149 char b[BDEVNAME_SIZE];
150 printk(KERN_ERR "Buffer I/O error on device %s, logical block %Lu\n",
151 bdevname(bh->b_bdev, b),
152 (unsigned long long)bh->b_blocknr);
153}
154
155
156
157
158
159
160
161
162
163static void __end_buffer_read_notouch(struct buffer_head *bh, int uptodate)
164{
165 if (uptodate) {
166 set_buffer_uptodate(bh);
167 } else {
168
169 clear_buffer_uptodate(bh);
170 }
171 unlock_buffer(bh);
172}
173
174
175
176
177
178void end_buffer_read_sync(struct buffer_head *bh, int uptodate)
179{
180 __end_buffer_read_notouch(bh, uptodate);
181 put_bh(bh);
182}
183EXPORT_SYMBOL(end_buffer_read_sync);
184
185void end_buffer_write_sync(struct buffer_head *bh, int uptodate)
186{
187 char b[BDEVNAME_SIZE];
188
189 if (uptodate) {
190 set_buffer_uptodate(bh);
191 } else {
192 if (!quiet_error(bh)) {
193 buffer_io_error(bh);
194 printk(KERN_WARNING "lost page write due to "
195 "I/O error on %s\n",
196 bdevname(bh->b_bdev, b));
197 }
198 set_buffer_write_io_error(bh);
199 clear_buffer_uptodate(bh);
200 }
201 unlock_buffer(bh);
202 put_bh(bh);
203}
204EXPORT_SYMBOL(end_buffer_write_sync);
205
206
207
208
209
210
211
212
213
214
215
216
217static struct buffer_head *
218__find_get_block_slow(struct block_device *bdev, sector_t block)
219{
220 struct inode *bd_inode = bdev->bd_inode;
221 struct address_space *bd_mapping = bd_inode->i_mapping;
222 struct buffer_head *ret = NULL;
223 pgoff_t index;
224 struct buffer_head *bh;
225 struct buffer_head *head;
226 struct page *page;
227 int all_mapped = 1;
228
229 index = block >> (PAGE_CACHE_SHIFT - bd_inode->i_blkbits);
230 page = find_get_page(bd_mapping, index);
231 if (!page)
232 goto out;
233
234 spin_lock(&bd_mapping->private_lock);
235 if (!page_has_buffers(page))
236 goto out_unlock;
237 head = page_buffers(page);
238 bh = head;
239 do {
240 if (!buffer_mapped(bh))
241 all_mapped = 0;
242 else if (bh->b_blocknr == block) {
243 ret = bh;
244 get_bh(bh);
245 goto out_unlock;
246 }
247 bh = bh->b_this_page;
248 } while (bh != head);
249
250
251
252
253
254
255 if (all_mapped) {
256 char b[BDEVNAME_SIZE];
257
258 printk("__find_get_block_slow() failed. "
259 "block=%llu, b_blocknr=%llu\n",
260 (unsigned long long)block,
261 (unsigned long long)bh->b_blocknr);
262 printk("b_state=0x%08lx, b_size=%zu\n",
263 bh->b_state, bh->b_size);
264 printk("device %s blocksize: %d\n", bdevname(bdev, b),
265 1 << bd_inode->i_blkbits);
266 }
267out_unlock:
268 spin_unlock(&bd_mapping->private_lock);
269 page_cache_release(page);
270out:
271 return ret;
272}
273
274
275
276
277static void free_more_memory(void)
278{
279 struct zone *zone;
280 int nid;
281
282 wakeup_flusher_threads(1024, WB_REASON_FREE_MORE_MEM);
283 yield();
284
285 for_each_online_node(nid) {
286 (void)first_zones_zonelist(node_zonelist(nid, GFP_NOFS),
287 gfp_zone(GFP_NOFS), NULL,
288 &zone);
289 if (zone)
290 try_to_free_pages(node_zonelist(nid, GFP_NOFS), 0,
291 GFP_NOFS, NULL);
292 }
293}
294
295
296
297
298
299static void end_buffer_async_read(struct buffer_head *bh, int uptodate)
300{
301 unsigned long flags;
302 struct buffer_head *first;
303 struct buffer_head *tmp;
304 struct page *page;
305 int page_uptodate = 1;
306
307 BUG_ON(!buffer_async_read(bh));
308
309 page = bh->b_page;
310 if (uptodate) {
311 set_buffer_uptodate(bh);
312 } else {
313 clear_buffer_uptodate(bh);
314 if (!quiet_error(bh))
315 buffer_io_error(bh);
316 SetPageError(page);
317 }
318
319
320
321
322
323
324 first = page_buffers(page);
325 local_irq_save(flags);
326 bit_spin_lock(BH_Uptodate_Lock, &first->b_state);
327 clear_buffer_async_read(bh);
328 unlock_buffer(bh);
329 tmp = bh;
330 do {
331 if (!buffer_uptodate(tmp))
332 page_uptodate = 0;
333 if (buffer_async_read(tmp)) {
334 BUG_ON(!buffer_locked(tmp));
335 goto still_busy;
336 }
337 tmp = tmp->b_this_page;
338 } while (tmp != bh);
339 bit_spin_unlock(BH_Uptodate_Lock, &first->b_state);
340 local_irq_restore(flags);
341
342
343
344
345
346 if (page_uptodate && !PageError(page))
347 SetPageUptodate(page);
348 unlock_page(page);
349 return;
350
351still_busy:
352 bit_spin_unlock(BH_Uptodate_Lock, &first->b_state);
353 local_irq_restore(flags);
354 return;
355}
356
357
358
359
360
361void end_buffer_async_write(struct buffer_head *bh, int uptodate)
362{
363 char b[BDEVNAME_SIZE];
364 unsigned long flags;
365 struct buffer_head *first;
366 struct buffer_head *tmp;
367 struct page *page;
368
369 BUG_ON(!buffer_async_write(bh));
370
371 page = bh->b_page;
372 if (uptodate) {
373 set_buffer_uptodate(bh);
374 } else {
375 if (!quiet_error(bh)) {
376 buffer_io_error(bh);
377 printk(KERN_WARNING "lost page write due to "
378 "I/O error on %s\n",
379 bdevname(bh->b_bdev, b));
380 }
381 set_bit(AS_EIO, &page->mapping->flags);
382 set_buffer_write_io_error(bh);
383 clear_buffer_uptodate(bh);
384 SetPageError(page);
385 }
386
387 first = page_buffers(page);
388 local_irq_save(flags);
389 bit_spin_lock(BH_Uptodate_Lock, &first->b_state);
390
391 clear_buffer_async_write(bh);
392 unlock_buffer(bh);
393 tmp = bh->b_this_page;
394 while (tmp != bh) {
395 if (buffer_async_write(tmp)) {
396 BUG_ON(!buffer_locked(tmp));
397 goto still_busy;
398 }
399 tmp = tmp->b_this_page;
400 }
401 bit_spin_unlock(BH_Uptodate_Lock, &first->b_state);
402 local_irq_restore(flags);
403 end_page_writeback(page);
404 return;
405
406still_busy:
407 bit_spin_unlock(BH_Uptodate_Lock, &first->b_state);
408 local_irq_restore(flags);
409 return;
410}
411EXPORT_SYMBOL(end_buffer_async_write);
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434static void mark_buffer_async_read(struct buffer_head *bh)
435{
436 bh->b_end_io = end_buffer_async_read;
437 set_buffer_async_read(bh);
438}
439
440static void mark_buffer_async_write_endio(struct buffer_head *bh,
441 bh_end_io_t *handler)
442{
443 bh->b_end_io = handler;
444 set_buffer_async_write(bh);
445}
446
447void mark_buffer_async_write(struct buffer_head *bh)
448{
449 mark_buffer_async_write_endio(bh, end_buffer_async_write);
450}
451EXPORT_SYMBOL(mark_buffer_async_write);
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506static void __remove_assoc_queue(struct buffer_head *bh)
507{
508 list_del_init(&bh->b_assoc_buffers);
509 WARN_ON(!bh->b_assoc_map);
510 if (buffer_write_io_error(bh))
511 set_bit(AS_EIO, &bh->b_assoc_map->flags);
512 bh->b_assoc_map = NULL;
513}
514
515int inode_has_buffers(struct inode *inode)
516{
517 return !list_empty(&inode->i_data.private_list);
518}
519
520
521
522
523
524
525
526
527
528
529
530static int osync_buffers_list(spinlock_t *lock, struct list_head *list)
531{
532 struct buffer_head *bh;
533 struct list_head *p;
534 int err = 0;
535
536 spin_lock(lock);
537repeat:
538 list_for_each_prev(p, list) {
539 bh = BH_ENTRY(p);
540 if (buffer_locked(bh)) {
541 get_bh(bh);
542 spin_unlock(lock);
543 wait_on_buffer(bh);
544 if (!buffer_uptodate(bh))
545 err = -EIO;
546 brelse(bh);
547 spin_lock(lock);
548 goto repeat;
549 }
550 }
551 spin_unlock(lock);
552 return err;
553}
554
555static void do_thaw_one(struct super_block *sb, void *unused)
556{
557 char b[BDEVNAME_SIZE];
558 while (sb->s_bdev && !thaw_bdev(sb->s_bdev, sb))
559 printk(KERN_WARNING "Emergency Thaw on %s\n",
560 bdevname(sb->s_bdev, b));
561}
562
563static void do_thaw_all(struct work_struct *work)
564{
565 iterate_supers(do_thaw_one, NULL);
566 kfree(work);
567 printk(KERN_WARNING "Emergency Thaw complete\n");
568}
569
570
571
572
573
574
575void emergency_thaw_all(void)
576{
577 struct work_struct *work;
578
579 work = kmalloc(sizeof(*work), GFP_ATOMIC);
580 if (work) {
581 INIT_WORK(work, do_thaw_all);
582 schedule_work(work);
583 }
584}
585
586
587
588
589
590
591
592
593
594
595
596
597int sync_mapping_buffers(struct address_space *mapping)
598{
599 struct address_space *buffer_mapping = mapping->private_data;
600
601 if (buffer_mapping == NULL || list_empty(&mapping->private_list))
602 return 0;
603
604 return fsync_buffers_list(&buffer_mapping->private_lock,
605 &mapping->private_list);
606}
607EXPORT_SYMBOL(sync_mapping_buffers);
608
609
610
611
612
613
614
615void write_boundary_block(struct block_device *bdev,
616 sector_t bblock, unsigned blocksize)
617{
618 struct buffer_head *bh = __find_get_block(bdev, bblock + 1, blocksize);
619 if (bh) {
620 if (buffer_dirty(bh))
621 ll_rw_block(WRITE, 1, &bh);
622 put_bh(bh);
623 }
624}
625
626void mark_buffer_dirty_inode(struct buffer_head *bh, struct inode *inode)
627{
628 struct address_space *mapping = inode->i_mapping;
629 struct address_space *buffer_mapping = bh->b_page->mapping;
630
631 mark_buffer_dirty(bh);
632 if (!mapping->private_data) {
633 mapping->private_data = buffer_mapping;
634 } else {
635 BUG_ON(mapping->private_data != buffer_mapping);
636 }
637 if (!bh->b_assoc_map) {
638 spin_lock(&buffer_mapping->private_lock);
639 list_move_tail(&bh->b_assoc_buffers,
640 &mapping->private_list);
641 bh->b_assoc_map = mapping;
642 spin_unlock(&buffer_mapping->private_lock);
643 }
644}
645EXPORT_SYMBOL(mark_buffer_dirty_inode);
646
647
648
649
650
651
652
653
654static void __set_page_dirty(struct page *page,
655 struct address_space *mapping, int warn)
656{
657 spin_lock_irq(&mapping->tree_lock);
658 if (page->mapping) {
659 WARN_ON_ONCE(warn && !PageUptodate(page));
660 account_page_dirtied(page, mapping);
661 radix_tree_tag_set(&mapping->page_tree,
662 page_index(page), PAGECACHE_TAG_DIRTY);
663 }
664 spin_unlock_irq(&mapping->tree_lock);
665 __mark_inode_dirty(mapping->host, I_DIRTY_PAGES);
666}
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693int __set_page_dirty_buffers(struct page *page)
694{
695 int newly_dirty;
696 struct address_space *mapping = page_mapping(page);
697
698 if (unlikely(!mapping))
699 return !TestSetPageDirty(page);
700
701 spin_lock(&mapping->private_lock);
702 if (page_has_buffers(page)) {
703 struct buffer_head *head = page_buffers(page);
704 struct buffer_head *bh = head;
705
706 do {
707 set_buffer_dirty(bh);
708 bh = bh->b_this_page;
709 } while (bh != head);
710 }
711 newly_dirty = !TestSetPageDirty(page);
712 spin_unlock(&mapping->private_lock);
713
714 if (newly_dirty)
715 __set_page_dirty(page, mapping, 1);
716 return newly_dirty;
717}
718EXPORT_SYMBOL(__set_page_dirty_buffers);
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739static int fsync_buffers_list(spinlock_t *lock, struct list_head *list)
740{
741 struct buffer_head *bh;
742 struct list_head tmp;
743 struct address_space *mapping;
744 int err = 0, err2;
745 struct blk_plug plug;
746
747 INIT_LIST_HEAD(&tmp);
748 blk_start_plug(&plug);
749
750 spin_lock(lock);
751 while (!list_empty(list)) {
752 bh = BH_ENTRY(list->next);
753 mapping = bh->b_assoc_map;
754 __remove_assoc_queue(bh);
755
756
757 smp_mb();
758 if (buffer_dirty(bh) || buffer_locked(bh)) {
759 list_add(&bh->b_assoc_buffers, &tmp);
760 bh->b_assoc_map = mapping;
761 if (buffer_dirty(bh)) {
762 get_bh(bh);
763 spin_unlock(lock);
764
765
766
767
768
769
770
771 write_dirty_buffer(bh, WRITE_SYNC);
772
773
774
775
776
777
778
779 brelse(bh);
780 spin_lock(lock);
781 }
782 }
783 }
784
785 spin_unlock(lock);
786 blk_finish_plug(&plug);
787 spin_lock(lock);
788
789 while (!list_empty(&tmp)) {
790 bh = BH_ENTRY(tmp.prev);
791 get_bh(bh);
792 mapping = bh->b_assoc_map;
793 __remove_assoc_queue(bh);
794
795
796 smp_mb();
797 if (buffer_dirty(bh)) {
798 list_add(&bh->b_assoc_buffers,
799 &mapping->private_list);
800 bh->b_assoc_map = mapping;
801 }
802 spin_unlock(lock);
803 wait_on_buffer(bh);
804 if (!buffer_uptodate(bh))
805 err = -EIO;
806 brelse(bh);
807 spin_lock(lock);
808 }
809
810 spin_unlock(lock);
811 err2 = osync_buffers_list(lock, list);
812 if (err)
813 return err;
814 else
815 return err2;
816}
817
818
819
820
821
822
823
824
825
826
827void invalidate_inode_buffers(struct inode *inode)
828{
829 if (inode_has_buffers(inode)) {
830 struct address_space *mapping = &inode->i_data;
831 struct list_head *list = &mapping->private_list;
832 struct address_space *buffer_mapping = mapping->private_data;
833
834 spin_lock(&buffer_mapping->private_lock);
835 while (!list_empty(list))
836 __remove_assoc_queue(BH_ENTRY(list->next));
837 spin_unlock(&buffer_mapping->private_lock);
838 }
839}
840EXPORT_SYMBOL(invalidate_inode_buffers);
841
842
843
844
845
846
847
848int remove_inode_buffers(struct inode *inode)
849{
850 int ret = 1;
851
852 if (inode_has_buffers(inode)) {
853 struct address_space *mapping = &inode->i_data;
854 struct list_head *list = &mapping->private_list;
855 struct address_space *buffer_mapping = mapping->private_data;
856
857 spin_lock(&buffer_mapping->private_lock);
858 while (!list_empty(list)) {
859 struct buffer_head *bh = BH_ENTRY(list->next);
860 if (buffer_dirty(bh)) {
861 ret = 0;
862 break;
863 }
864 __remove_assoc_queue(bh);
865 }
866 spin_unlock(&buffer_mapping->private_lock);
867 }
868 return ret;
869}
870
871
872
873
874
875
876
877
878
879
880struct buffer_head *alloc_page_buffers(struct page *page, unsigned long size,
881 int retry)
882{
883 struct buffer_head *bh, *head;
884 long offset;
885
886try_again:
887 head = NULL;
888 offset = PAGE_SIZE;
889 while ((offset -= size) >= 0) {
890 bh = alloc_buffer_head(GFP_NOFS);
891 if (!bh)
892 goto no_grow;
893
894 bh->b_this_page = head;
895 bh->b_blocknr = -1;
896 head = bh;
897
898 bh->b_size = size;
899
900
901 set_bh_page(bh, page, offset);
902 }
903 return head;
904
905
906
907no_grow:
908 if (head) {
909 do {
910 bh = head;
911 head = head->b_this_page;
912 free_buffer_head(bh);
913 } while (head);
914 }
915
916
917
918
919
920
921
922 if (!retry)
923 return NULL;
924
925
926
927
928
929
930
931 free_more_memory();
932 goto try_again;
933}
934EXPORT_SYMBOL_GPL(alloc_page_buffers);
935
936static inline void
937link_dev_buffers(struct page *page, struct buffer_head *head)
938{
939 struct buffer_head *bh, *tail;
940
941 bh = head;
942 do {
943 tail = bh;
944 bh = bh->b_this_page;
945 } while (bh);
946 tail->b_this_page = head;
947 attach_page_buffers(page, head);
948}
949
950static sector_t blkdev_max_block(struct block_device *bdev, unsigned int size)
951{
952 sector_t retval = ~((sector_t)0);
953 loff_t sz = i_size_read(bdev->bd_inode);
954
955 if (sz) {
956 unsigned int sizebits = blksize_bits(size);
957 retval = (sz >> sizebits);
958 }
959 return retval;
960}
961
962
963
964
965static sector_t
966init_page_buffers(struct page *page, struct block_device *bdev,
967 sector_t block, int size)
968{
969 struct buffer_head *head = page_buffers(page);
970 struct buffer_head *bh = head;
971 int uptodate = PageUptodate(page);
972 sector_t end_block = blkdev_max_block(I_BDEV(bdev->bd_inode), size);
973
974 do {
975 if (!buffer_mapped(bh)) {
976 init_buffer(bh, NULL, NULL);
977 bh->b_bdev = bdev;
978 bh->b_blocknr = block;
979 if (uptodate)
980 set_buffer_uptodate(bh);
981 if (block < end_block)
982 set_buffer_mapped(bh);
983 }
984 block++;
985 bh = bh->b_this_page;
986 } while (bh != head);
987
988
989
990
991 return end_block;
992}
993
994
995
996
997
998
999static int
1000grow_dev_page(struct block_device *bdev, sector_t block,
1001 pgoff_t index, int size, int sizebits)
1002{
1003 struct inode *inode = bdev->bd_inode;
1004 struct page *page;
1005 struct buffer_head *bh;
1006 sector_t end_block;
1007 int ret = 0;
1008
1009 page = find_or_create_page(inode->i_mapping, index,
1010 (mapping_gfp_mask(inode->i_mapping) & ~__GFP_FS)|__GFP_MOVABLE);
1011 if (!page)
1012 return ret;
1013
1014 BUG_ON(!PageLocked(page));
1015
1016 if (page_has_buffers(page)) {
1017 bh = page_buffers(page);
1018 if (bh->b_size == size) {
1019 end_block = init_page_buffers(page, bdev,
1020 index << sizebits, size);
1021 goto done;
1022 }
1023 if (!try_to_free_buffers(page))
1024 goto failed;
1025 }
1026
1027
1028
1029
1030 bh = alloc_page_buffers(page, size, 0);
1031 if (!bh)
1032 goto failed;
1033
1034
1035
1036
1037
1038
1039 spin_lock(&inode->i_mapping->private_lock);
1040 link_dev_buffers(page, bh);
1041 end_block = init_page_buffers(page, bdev, index << sizebits, size);
1042 spin_unlock(&inode->i_mapping->private_lock);
1043done:
1044 ret = (block < end_block) ? 1 : -ENXIO;
1045failed:
1046 unlock_page(page);
1047 page_cache_release(page);
1048 return ret;
1049}
1050
1051
1052
1053
1054
1055static int
1056grow_buffers(struct block_device *bdev, sector_t block, int size)
1057{
1058 pgoff_t index;
1059 int sizebits;
1060
1061 sizebits = -1;
1062 do {
1063 sizebits++;
1064 } while ((size << sizebits) < PAGE_SIZE);
1065
1066 index = block >> sizebits;
1067
1068
1069
1070
1071
1072 if (unlikely(index != block >> sizebits)) {
1073 char b[BDEVNAME_SIZE];
1074
1075 printk(KERN_ERR "%s: requested out-of-range block %llu for "
1076 "device %s\n",
1077 __func__, (unsigned long long)block,
1078 bdevname(bdev, b));
1079 return -EIO;
1080 }
1081
1082
1083 return grow_dev_page(bdev, block, index, size, sizebits);
1084}
1085
1086static struct buffer_head *
1087__getblk_slow(struct block_device *bdev, sector_t block, int size)
1088{
1089
1090 if (unlikely(size & (bdev_logical_block_size(bdev)-1) ||
1091 (size < 512 || size > PAGE_SIZE))) {
1092 printk(KERN_ERR "getblk(): invalid block size %d requested\n",
1093 size);
1094 printk(KERN_ERR "logical block size: %d\n",
1095 bdev_logical_block_size(bdev));
1096
1097 dump_stack();
1098 return NULL;
1099 }
1100
1101 for (;;) {
1102 struct buffer_head *bh;
1103 int ret;
1104
1105 bh = __find_get_block(bdev, block, size);
1106 if (bh)
1107 return bh;
1108
1109 ret = grow_buffers(bdev, block, size);
1110 if (ret < 0)
1111 return NULL;
1112 if (ret == 0)
1113 free_more_memory();
1114 }
1115}
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152void mark_buffer_dirty(struct buffer_head *bh)
1153{
1154 WARN_ON_ONCE(!buffer_uptodate(bh));
1155
1156 trace_block_dirty_buffer(bh);
1157
1158
1159
1160
1161
1162
1163
1164 if (buffer_dirty(bh)) {
1165 smp_mb();
1166 if (buffer_dirty(bh))
1167 return;
1168 }
1169
1170 if (!test_set_buffer_dirty(bh)) {
1171 struct page *page = bh->b_page;
1172 if (!TestSetPageDirty(page)) {
1173 struct address_space *mapping = page_mapping(page);
1174 if (mapping)
1175 __set_page_dirty(page, mapping, 0);
1176 }
1177 }
1178}
1179EXPORT_SYMBOL(mark_buffer_dirty);
1180
1181
1182
1183
1184
1185
1186
1187
1188void __brelse(struct buffer_head * buf)
1189{
1190 if (atomic_read(&buf->b_count)) {
1191 put_bh(buf);
1192 return;
1193 }
1194 WARN(1, KERN_ERR "VFS: brelse: Trying to free free buffer\n");
1195}
1196EXPORT_SYMBOL(__brelse);
1197
1198
1199
1200
1201
1202void __bforget(struct buffer_head *bh)
1203{
1204 clear_buffer_dirty(bh);
1205 if (bh->b_assoc_map) {
1206 struct address_space *buffer_mapping = bh->b_page->mapping;
1207
1208 spin_lock(&buffer_mapping->private_lock);
1209 list_del_init(&bh->b_assoc_buffers);
1210 bh->b_assoc_map = NULL;
1211 spin_unlock(&buffer_mapping->private_lock);
1212 }
1213 __brelse(bh);
1214}
1215EXPORT_SYMBOL(__bforget);
1216
1217static struct buffer_head *__bread_slow(struct buffer_head *bh)
1218{
1219 lock_buffer(bh);
1220 if (buffer_uptodate(bh)) {
1221 unlock_buffer(bh);
1222 return bh;
1223 } else {
1224 get_bh(bh);
1225 bh->b_end_io = end_buffer_read_sync;
1226 submit_bh(READ, bh);
1227 wait_on_buffer(bh);
1228 if (buffer_uptodate(bh))
1229 return bh;
1230 }
1231 brelse(bh);
1232 return NULL;
1233}
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249#define BH_LRU_SIZE 8
1250
1251struct bh_lru {
1252 struct buffer_head *bhs[BH_LRU_SIZE];
1253};
1254
1255static DEFINE_PER_CPU(struct bh_lru, bh_lrus) = {{ NULL }};
1256
1257#ifdef CONFIG_SMP
1258#define bh_lru_lock() local_irq_disable()
1259#define bh_lru_unlock() local_irq_enable()
1260#else
1261#define bh_lru_lock() preempt_disable()
1262#define bh_lru_unlock() preempt_enable()
1263#endif
1264
1265static inline void check_irqs_on(void)
1266{
1267#ifdef irqs_disabled
1268 BUG_ON(irqs_disabled());
1269#endif
1270}
1271
1272
1273
1274
1275static void bh_lru_install(struct buffer_head *bh)
1276{
1277 struct buffer_head *evictee = NULL;
1278
1279 check_irqs_on();
1280 bh_lru_lock();
1281 if (__this_cpu_read(bh_lrus.bhs[0]) != bh) {
1282 struct buffer_head *bhs[BH_LRU_SIZE];
1283 int in;
1284 int out = 0;
1285
1286 get_bh(bh);
1287 bhs[out++] = bh;
1288 for (in = 0; in < BH_LRU_SIZE; in++) {
1289 struct buffer_head *bh2 =
1290 __this_cpu_read(bh_lrus.bhs[in]);
1291
1292 if (bh2 == bh) {
1293 __brelse(bh2);
1294 } else {
1295 if (out >= BH_LRU_SIZE) {
1296 BUG_ON(evictee != NULL);
1297 evictee = bh2;
1298 } else {
1299 bhs[out++] = bh2;
1300 }
1301 }
1302 }
1303 while (out < BH_LRU_SIZE)
1304 bhs[out++] = NULL;
1305 memcpy(__this_cpu_ptr(&bh_lrus.bhs), bhs, sizeof(bhs));
1306 }
1307 bh_lru_unlock();
1308
1309 if (evictee)
1310 __brelse(evictee);
1311}
1312
1313
1314
1315
1316static struct buffer_head *
1317lookup_bh_lru(struct block_device *bdev, sector_t block, unsigned size)
1318{
1319 struct buffer_head *ret = NULL;
1320 unsigned int i;
1321
1322 check_irqs_on();
1323 bh_lru_lock();
1324 for (i = 0; i < BH_LRU_SIZE; i++) {
1325 struct buffer_head *bh = __this_cpu_read(bh_lrus.bhs[i]);
1326
1327 if (bh && bh->b_bdev == bdev &&
1328 bh->b_blocknr == block && bh->b_size == size) {
1329 if (i) {
1330 while (i) {
1331 __this_cpu_write(bh_lrus.bhs[i],
1332 __this_cpu_read(bh_lrus.bhs[i - 1]));
1333 i--;
1334 }
1335 __this_cpu_write(bh_lrus.bhs[0], bh);
1336 }
1337 get_bh(bh);
1338 ret = bh;
1339 break;
1340 }
1341 }
1342 bh_lru_unlock();
1343 return ret;
1344}
1345
1346
1347
1348
1349
1350
1351struct buffer_head *
1352__find_get_block(struct block_device *bdev, sector_t block, unsigned size)
1353{
1354 struct buffer_head *bh = lookup_bh_lru(bdev, block, size);
1355
1356 if (bh == NULL) {
1357 bh = __find_get_block_slow(bdev, block);
1358 if (bh)
1359 bh_lru_install(bh);
1360 }
1361 if (bh)
1362 touch_buffer(bh);
1363 return bh;
1364}
1365EXPORT_SYMBOL(__find_get_block);
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375struct buffer_head *
1376__getblk(struct block_device *bdev, sector_t block, unsigned size)
1377{
1378 struct buffer_head *bh = __find_get_block(bdev, block, size);
1379
1380 might_sleep();
1381 if (bh == NULL)
1382 bh = __getblk_slow(bdev, block, size);
1383 return bh;
1384}
1385EXPORT_SYMBOL(__getblk);
1386
1387
1388
1389
1390void __breadahead(struct block_device *bdev, sector_t block, unsigned size)
1391{
1392 struct buffer_head *bh = __getblk(bdev, block, size);
1393 if (likely(bh)) {
1394 ll_rw_block(READA, 1, &bh);
1395 brelse(bh);
1396 }
1397}
1398EXPORT_SYMBOL(__breadahead);
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409struct buffer_head *
1410__bread(struct block_device *bdev, sector_t block, unsigned size)
1411{
1412 struct buffer_head *bh = __getblk(bdev, block, size);
1413
1414 if (likely(bh) && !buffer_uptodate(bh))
1415 bh = __bread_slow(bh);
1416 return bh;
1417}
1418EXPORT_SYMBOL(__bread);
1419
1420
1421
1422
1423
1424
1425static void invalidate_bh_lru(void *arg)
1426{
1427 struct bh_lru *b = &get_cpu_var(bh_lrus);
1428 int i;
1429
1430 for (i = 0; i < BH_LRU_SIZE; i++) {
1431 brelse(b->bhs[i]);
1432 b->bhs[i] = NULL;
1433 }
1434 put_cpu_var(bh_lrus);
1435}
1436
1437static bool has_bh_in_lru(int cpu, void *dummy)
1438{
1439 struct bh_lru *b = per_cpu_ptr(&bh_lrus, cpu);
1440 int i;
1441
1442 for (i = 0; i < BH_LRU_SIZE; i++) {
1443 if (b->bhs[i])
1444 return 1;
1445 }
1446
1447 return 0;
1448}
1449
1450void invalidate_bh_lrus(void)
1451{
1452 on_each_cpu_cond(has_bh_in_lru, invalidate_bh_lru, NULL, 1, GFP_KERNEL);
1453}
1454EXPORT_SYMBOL_GPL(invalidate_bh_lrus);
1455
1456void set_bh_page(struct buffer_head *bh,
1457 struct page *page, unsigned long offset)
1458{
1459 bh->b_page = page;
1460 BUG_ON(offset >= PAGE_SIZE);
1461 if (PageHighMem(page))
1462
1463
1464
1465 bh->b_data = (char *)(0 + offset);
1466 else
1467 bh->b_data = page_address(page) + offset;
1468}
1469EXPORT_SYMBOL(set_bh_page);
1470
1471
1472
1473
1474static void discard_buffer(struct buffer_head * bh)
1475{
1476 lock_buffer(bh);
1477 clear_buffer_dirty(bh);
1478 bh->b_bdev = NULL;
1479 clear_buffer_mapped(bh);
1480 clear_buffer_req(bh);
1481 clear_buffer_new(bh);
1482 clear_buffer_delay(bh);
1483 clear_buffer_unwritten(bh);
1484 unlock_buffer(bh);
1485}
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503void block_invalidatepage(struct page *page, unsigned int offset,
1504 unsigned int length)
1505{
1506 struct buffer_head *head, *bh, *next;
1507 unsigned int curr_off = 0;
1508 unsigned int stop = length + offset;
1509
1510 BUG_ON(!PageLocked(page));
1511 if (!page_has_buffers(page))
1512 goto out;
1513
1514
1515
1516
1517 BUG_ON(stop > PAGE_CACHE_SIZE || stop < length);
1518
1519 head = page_buffers(page);
1520 bh = head;
1521 do {
1522 unsigned int next_off = curr_off + bh->b_size;
1523 next = bh->b_this_page;
1524
1525
1526
1527
1528 if (next_off > stop)
1529 goto out;
1530
1531
1532
1533
1534 if (offset <= curr_off)
1535 discard_buffer(bh);
1536 curr_off = next_off;
1537 bh = next;
1538 } while (bh != head);
1539
1540
1541
1542
1543
1544
1545 if (offset == 0)
1546 try_to_release_page(page, 0);
1547out:
1548 return;
1549}
1550EXPORT_SYMBOL(block_invalidatepage);
1551
1552
1553
1554
1555
1556
1557
1558void create_empty_buffers(struct page *page,
1559 unsigned long blocksize, unsigned long b_state)
1560{
1561 struct buffer_head *bh, *head, *tail;
1562
1563 head = alloc_page_buffers(page, blocksize, 1);
1564 bh = head;
1565 do {
1566 bh->b_state |= b_state;
1567 tail = bh;
1568 bh = bh->b_this_page;
1569 } while (bh);
1570 tail->b_this_page = head;
1571
1572 spin_lock(&page->mapping->private_lock);
1573 if (PageUptodate(page) || PageDirty(page)) {
1574 bh = head;
1575 do {
1576 if (PageDirty(page))
1577 set_buffer_dirty(bh);
1578 if (PageUptodate(page))
1579 set_buffer_uptodate(bh);
1580 bh = bh->b_this_page;
1581 } while (bh != head);
1582 }
1583 attach_page_buffers(page, head);
1584 spin_unlock(&page->mapping->private_lock);
1585}
1586EXPORT_SYMBOL(create_empty_buffers);
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604void unmap_underlying_metadata(struct block_device *bdev, sector_t block)
1605{
1606 struct buffer_head *old_bh;
1607
1608 might_sleep();
1609
1610 old_bh = __find_get_block_slow(bdev, block);
1611 if (old_bh) {
1612 clear_buffer_dirty(old_bh);
1613 wait_on_buffer(old_bh);
1614 clear_buffer_req(old_bh);
1615 __brelse(old_bh);
1616 }
1617}
1618EXPORT_SYMBOL(unmap_underlying_metadata);
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628static inline int block_size_bits(unsigned int blocksize)
1629{
1630 return ilog2(blocksize);
1631}
1632
1633static struct buffer_head *create_page_buffers(struct page *page, struct inode *inode, unsigned int b_state)
1634{
1635 BUG_ON(!PageLocked(page));
1636
1637 if (!page_has_buffers(page))
1638 create_empty_buffers(page, 1 << ACCESS_ONCE(inode->i_blkbits), b_state);
1639 return page_buffers(page);
1640}
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671static int __block_write_full_page(struct inode *inode, struct page *page,
1672 get_block_t *get_block, struct writeback_control *wbc,
1673 bh_end_io_t *handler)
1674{
1675 int err;
1676 sector_t block;
1677 sector_t last_block;
1678 struct buffer_head *bh, *head;
1679 unsigned int blocksize, bbits;
1680 int nr_underway = 0;
1681 int write_op = (wbc->sync_mode == WB_SYNC_ALL ?
1682 WRITE_SYNC : WRITE);
1683
1684 head = create_page_buffers(page, inode,
1685 (1 << BH_Dirty)|(1 << BH_Uptodate));
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697 bh = head;
1698 blocksize = bh->b_size;
1699 bbits = block_size_bits(blocksize);
1700
1701 block = (sector_t)page->index << (PAGE_CACHE_SHIFT - bbits);
1702 last_block = (i_size_read(inode) - 1) >> bbits;
1703
1704
1705
1706
1707
1708 do {
1709 if (block > last_block) {
1710
1711
1712
1713
1714
1715
1716
1717
1718 clear_buffer_dirty(bh);
1719 set_buffer_uptodate(bh);
1720 } else if ((!buffer_mapped(bh) || buffer_delay(bh)) &&
1721 buffer_dirty(bh)) {
1722 WARN_ON(bh->b_size != blocksize);
1723 err = get_block(inode, block, bh, 1);
1724 if (err)
1725 goto recover;
1726 clear_buffer_delay(bh);
1727 if (buffer_new(bh)) {
1728
1729 clear_buffer_new(bh);
1730 unmap_underlying_metadata(bh->b_bdev,
1731 bh->b_blocknr);
1732 }
1733 }
1734 bh = bh->b_this_page;
1735 block++;
1736 } while (bh != head);
1737
1738 do {
1739 if (!buffer_mapped(bh))
1740 continue;
1741
1742
1743
1744
1745
1746
1747
1748 if (wbc->sync_mode != WB_SYNC_NONE) {
1749 lock_buffer(bh);
1750 } else if (!trylock_buffer(bh)) {
1751 redirty_page_for_writepage(wbc, page);
1752 continue;
1753 }
1754 if (test_clear_buffer_dirty(bh)) {
1755 mark_buffer_async_write_endio(bh, handler);
1756 } else {
1757 unlock_buffer(bh);
1758 }
1759 } while ((bh = bh->b_this_page) != head);
1760
1761
1762
1763
1764
1765 BUG_ON(PageWriteback(page));
1766 set_page_writeback(page);
1767
1768 do {
1769 struct buffer_head *next = bh->b_this_page;
1770 if (buffer_async_write(bh)) {
1771 submit_bh(write_op, bh);
1772 nr_underway++;
1773 }
1774 bh = next;
1775 } while (bh != head);
1776 unlock_page(page);
1777
1778 err = 0;
1779done:
1780 if (nr_underway == 0) {
1781
1782
1783
1784
1785
1786 end_page_writeback(page);
1787
1788
1789
1790
1791
1792 }
1793 return err;
1794
1795recover:
1796
1797
1798
1799
1800
1801
1802 bh = head;
1803
1804 do {
1805 if (buffer_mapped(bh) && buffer_dirty(bh) &&
1806 !buffer_delay(bh)) {
1807 lock_buffer(bh);
1808 mark_buffer_async_write_endio(bh, handler);
1809 } else {
1810
1811
1812
1813
1814 clear_buffer_dirty(bh);
1815 }
1816 } while ((bh = bh->b_this_page) != head);
1817 SetPageError(page);
1818 BUG_ON(PageWriteback(page));
1819 mapping_set_error(page->mapping, err);
1820 set_page_writeback(page);
1821 do {
1822 struct buffer_head *next = bh->b_this_page;
1823 if (buffer_async_write(bh)) {
1824 clear_buffer_dirty(bh);
1825 submit_bh(write_op, bh);
1826 nr_underway++;
1827 }
1828 bh = next;
1829 } while (bh != head);
1830 unlock_page(page);
1831 goto done;
1832}
1833
1834
1835
1836
1837
1838
1839void page_zero_new_buffers(struct page *page, unsigned from, unsigned to)
1840{
1841 unsigned int block_start, block_end;
1842 struct buffer_head *head, *bh;
1843
1844 BUG_ON(!PageLocked(page));
1845 if (!page_has_buffers(page))
1846 return;
1847
1848 bh = head = page_buffers(page);
1849 block_start = 0;
1850 do {
1851 block_end = block_start + bh->b_size;
1852
1853 if (buffer_new(bh)) {
1854 if (block_end > from && block_start < to) {
1855 if (!PageUptodate(page)) {
1856 unsigned start, size;
1857
1858 start = max(from, block_start);
1859 size = min(to, block_end) - start;
1860
1861 zero_user(page, start, size);
1862 set_buffer_uptodate(bh);
1863 }
1864
1865 clear_buffer_new(bh);
1866 mark_buffer_dirty(bh);
1867 }
1868 }
1869
1870 block_start = block_end;
1871 bh = bh->b_this_page;
1872 } while (bh != head);
1873}
1874EXPORT_SYMBOL(page_zero_new_buffers);
1875
1876int __block_write_begin(struct page *page, loff_t pos, unsigned len,
1877 get_block_t *get_block)
1878{
1879 unsigned from = pos & (PAGE_CACHE_SIZE - 1);
1880 unsigned to = from + len;
1881 struct inode *inode = page->mapping->host;
1882 unsigned block_start, block_end;
1883 sector_t block;
1884 int err = 0;
1885 unsigned blocksize, bbits;
1886 struct buffer_head *bh, *head, *wait[2], **wait_bh=wait;
1887
1888 BUG_ON(!PageLocked(page));
1889 BUG_ON(from > PAGE_CACHE_SIZE);
1890 BUG_ON(to > PAGE_CACHE_SIZE);
1891 BUG_ON(from > to);
1892
1893 head = create_page_buffers(page, inode, 0);
1894 blocksize = head->b_size;
1895 bbits = block_size_bits(blocksize);
1896
1897 block = (sector_t)page->index << (PAGE_CACHE_SHIFT - bbits);
1898
1899 for(bh = head, block_start = 0; bh != head || !block_start;
1900 block++, block_start=block_end, bh = bh->b_this_page) {
1901 block_end = block_start + blocksize;
1902 if (block_end <= from || block_start >= to) {
1903 if (PageUptodate(page)) {
1904 if (!buffer_uptodate(bh))
1905 set_buffer_uptodate(bh);
1906 }
1907 continue;
1908 }
1909 if (buffer_new(bh))
1910 clear_buffer_new(bh);
1911 if (!buffer_mapped(bh)) {
1912 WARN_ON(bh->b_size != blocksize);
1913 err = get_block(inode, block, bh, 1);
1914 if (err)
1915 break;
1916 if (buffer_new(bh)) {
1917 unmap_underlying_metadata(bh->b_bdev,
1918 bh->b_blocknr);
1919 if (PageUptodate(page)) {
1920 clear_buffer_new(bh);
1921 set_buffer_uptodate(bh);
1922 mark_buffer_dirty(bh);
1923 continue;
1924 }
1925 if (block_end > to || block_start < from)
1926 zero_user_segments(page,
1927 to, block_end,
1928 block_start, from);
1929 continue;
1930 }
1931 }
1932 if (PageUptodate(page)) {
1933 if (!buffer_uptodate(bh))
1934 set_buffer_uptodate(bh);
1935 continue;
1936 }
1937 if (!buffer_uptodate(bh) && !buffer_delay(bh) &&
1938 !buffer_unwritten(bh) &&
1939 (block_start < from || block_end > to)) {
1940 ll_rw_block(READ, 1, &bh);
1941 *wait_bh++=bh;
1942 }
1943 }
1944
1945
1946
1947 while(wait_bh > wait) {
1948 wait_on_buffer(*--wait_bh);
1949 if (!buffer_uptodate(*wait_bh))
1950 err = -EIO;
1951 }
1952 if (unlikely(err))
1953 page_zero_new_buffers(page, from, to);
1954 return err;
1955}
1956EXPORT_SYMBOL(__block_write_begin);
1957
1958static int __block_commit_write(struct inode *inode, struct page *page,
1959 unsigned from, unsigned to)
1960{
1961 unsigned block_start, block_end;
1962 int partial = 0;
1963 unsigned blocksize;
1964 struct buffer_head *bh, *head;
1965
1966 bh = head = page_buffers(page);
1967 blocksize = bh->b_size;
1968
1969 block_start = 0;
1970 do {
1971 block_end = block_start + blocksize;
1972 if (block_end <= from || block_start >= to) {
1973 if (!buffer_uptodate(bh))
1974 partial = 1;
1975 } else {
1976 set_buffer_uptodate(bh);
1977 mark_buffer_dirty(bh);
1978 }
1979 clear_buffer_new(bh);
1980
1981 block_start = block_end;
1982 bh = bh->b_this_page;
1983 } while (bh != head);
1984
1985
1986
1987
1988
1989
1990
1991 if (!partial)
1992 SetPageUptodate(page);
1993 return 0;
1994}
1995
1996
1997
1998
1999
2000
2001
2002int block_write_begin(struct address_space *mapping, loff_t pos, unsigned len,
2003 unsigned flags, struct page **pagep, get_block_t *get_block)
2004{
2005 pgoff_t index = pos >> PAGE_CACHE_SHIFT;
2006 struct page *page;
2007 int status;
2008
2009 page = grab_cache_page_write_begin(mapping, index, flags);
2010 if (!page)
2011 return -ENOMEM;
2012
2013 status = __block_write_begin(page, pos, len, get_block);
2014 if (unlikely(status)) {
2015 unlock_page(page);
2016 page_cache_release(page);
2017 page = NULL;
2018 }
2019
2020 *pagep = page;
2021 return status;
2022}
2023EXPORT_SYMBOL(block_write_begin);
2024
2025int block_write_end(struct file *file, struct address_space *mapping,
2026 loff_t pos, unsigned len, unsigned copied,
2027 struct page *page, void *fsdata)
2028{
2029 struct inode *inode = mapping->host;
2030 unsigned start;
2031
2032 start = pos & (PAGE_CACHE_SIZE - 1);
2033
2034 if (unlikely(copied < len)) {
2035
2036
2037
2038
2039
2040
2041
2042
2043
2044
2045
2046
2047 if (!PageUptodate(page))
2048 copied = 0;
2049
2050 page_zero_new_buffers(page, start+copied, start+len);
2051 }
2052 flush_dcache_page(page);
2053
2054
2055 __block_commit_write(inode, page, start, start+copied);
2056
2057 return copied;
2058}
2059EXPORT_SYMBOL(block_write_end);
2060
2061int generic_write_end(struct file *file, struct address_space *mapping,
2062 loff_t pos, unsigned len, unsigned copied,
2063 struct page *page, void *fsdata)
2064{
2065 struct inode *inode = mapping->host;
2066 int i_size_changed = 0;
2067
2068 copied = block_write_end(file, mapping, pos, len, copied, page, fsdata);
2069
2070
2071
2072
2073
2074
2075
2076
2077 if (pos+copied > inode->i_size) {
2078 i_size_write(inode, pos+copied);
2079 i_size_changed = 1;
2080 }
2081
2082 unlock_page(page);
2083 page_cache_release(page);
2084
2085
2086
2087
2088
2089
2090
2091 if (i_size_changed)
2092 mark_inode_dirty(inode);
2093
2094 return copied;
2095}
2096EXPORT_SYMBOL(generic_write_end);
2097
2098
2099
2100
2101
2102
2103
2104
2105int block_is_partially_uptodate(struct page *page, read_descriptor_t *desc,
2106 unsigned long from)
2107{
2108 unsigned block_start, block_end, blocksize;
2109 unsigned to;
2110 struct buffer_head *bh, *head;
2111 int ret = 1;
2112
2113 if (!page_has_buffers(page))
2114 return 0;
2115
2116 head = page_buffers(page);
2117 blocksize = head->b_size;
2118 to = min_t(unsigned, PAGE_CACHE_SIZE - from, desc->count);
2119 to = from + to;
2120 if (from < blocksize && to > PAGE_CACHE_SIZE - blocksize)
2121 return 0;
2122
2123 bh = head;
2124 block_start = 0;
2125 do {
2126 block_end = block_start + blocksize;
2127 if (block_end > from && block_start < to) {
2128 if (!buffer_uptodate(bh)) {
2129 ret = 0;
2130 break;
2131 }
2132 if (block_end >= to)
2133 break;
2134 }
2135 block_start = block_end;
2136 bh = bh->b_this_page;
2137 } while (bh != head);
2138
2139 return ret;
2140}
2141EXPORT_SYMBOL(block_is_partially_uptodate);
2142
2143
2144
2145
2146
2147
2148
2149
2150int block_read_full_page(struct page *page, get_block_t *get_block)
2151{
2152 struct inode *inode = page->mapping->host;
2153 sector_t iblock, lblock;
2154 struct buffer_head *bh, *head, *arr[MAX_BUF_PER_PAGE];
2155 unsigned int blocksize, bbits;
2156 int nr, i;
2157 int fully_mapped = 1;
2158
2159 head = create_page_buffers(page, inode, 0);
2160 blocksize = head->b_size;
2161 bbits = block_size_bits(blocksize);
2162
2163 iblock = (sector_t)page->index << (PAGE_CACHE_SHIFT - bbits);
2164 lblock = (i_size_read(inode)+blocksize-1) >> bbits;
2165 bh = head;
2166 nr = 0;
2167 i = 0;
2168
2169 do {
2170 if (buffer_uptodate(bh))
2171 continue;
2172
2173 if (!buffer_mapped(bh)) {
2174 int err = 0;
2175
2176 fully_mapped = 0;
2177 if (iblock < lblock) {
2178 WARN_ON(bh->b_size != blocksize);
2179 err = get_block(inode, iblock, bh, 0);
2180 if (err)
2181 SetPageError(page);
2182 }
2183 if (!buffer_mapped(bh)) {
2184 zero_user(page, i * blocksize, blocksize);
2185 if (!err)
2186 set_buffer_uptodate(bh);
2187 continue;
2188 }
2189
2190
2191
2192
2193 if (buffer_uptodate(bh))
2194 continue;
2195 }
2196 arr[nr++] = bh;
2197 } while (i++, iblock++, (bh = bh->b_this_page) != head);
2198
2199 if (fully_mapped)
2200 SetPageMappedToDisk(page);
2201
2202 if (!nr) {
2203
2204
2205
2206
2207 if (!PageError(page))
2208 SetPageUptodate(page);
2209 unlock_page(page);
2210 return 0;
2211 }
2212
2213
2214 for (i = 0; i < nr; i++) {
2215 bh = arr[i];
2216 lock_buffer(bh);
2217 mark_buffer_async_read(bh);
2218 }
2219
2220
2221
2222
2223
2224
2225 for (i = 0; i < nr; i++) {
2226 bh = arr[i];
2227 if (buffer_uptodate(bh))
2228 end_buffer_async_read(bh, 1);
2229 else
2230 submit_bh(READ, bh);
2231 }
2232 return 0;
2233}
2234EXPORT_SYMBOL(block_read_full_page);
2235
2236
2237
2238
2239
2240int generic_cont_expand_simple(struct inode *inode, loff_t size)
2241{
2242 struct address_space *mapping = inode->i_mapping;
2243 struct page *page;
2244 void *fsdata;
2245 int err;
2246
2247 err = inode_newsize_ok(inode, size);
2248 if (err)
2249 goto out;
2250
2251 err = pagecache_write_begin(NULL, mapping, size, 0,
2252 AOP_FLAG_UNINTERRUPTIBLE|AOP_FLAG_CONT_EXPAND,
2253 &page, &fsdata);
2254 if (err)
2255 goto out;
2256
2257 err = pagecache_write_end(NULL, mapping, size, 0, 0, page, fsdata);
2258 BUG_ON(err > 0);
2259
2260out:
2261 return err;
2262}
2263EXPORT_SYMBOL(generic_cont_expand_simple);
2264
2265static int cont_expand_zero(struct file *file, struct address_space *mapping,
2266 loff_t pos, loff_t *bytes)
2267{
2268 struct inode *inode = mapping->host;
2269 unsigned blocksize = 1 << inode->i_blkbits;
2270 struct page *page;
2271 void *fsdata;
2272 pgoff_t index, curidx;
2273 loff_t curpos;
2274 unsigned zerofrom, offset, len;
2275 int err = 0;
2276
2277 index = pos >> PAGE_CACHE_SHIFT;
2278 offset = pos & ~PAGE_CACHE_MASK;
2279
2280 while (index > (curidx = (curpos = *bytes)>>PAGE_CACHE_SHIFT)) {
2281 zerofrom = curpos & ~PAGE_CACHE_MASK;
2282 if (zerofrom & (blocksize-1)) {
2283 *bytes |= (blocksize-1);
2284 (*bytes)++;
2285 }
2286 len = PAGE_CACHE_SIZE - zerofrom;
2287
2288 err = pagecache_write_begin(file, mapping, curpos, len,
2289 AOP_FLAG_UNINTERRUPTIBLE,
2290 &page, &fsdata);
2291 if (err)
2292 goto out;
2293 zero_user(page, zerofrom, len);
2294 err = pagecache_write_end(file, mapping, curpos, len, len,
2295 page, fsdata);
2296 if (err < 0)
2297 goto out;
2298 BUG_ON(err != len);
2299 err = 0;
2300
2301 balance_dirty_pages_ratelimited(mapping);
2302 }
2303
2304
2305 if (index == curidx) {
2306 zerofrom = curpos & ~PAGE_CACHE_MASK;
2307
2308 if (offset <= zerofrom) {
2309 goto out;
2310 }
2311 if (zerofrom & (blocksize-1)) {
2312 *bytes |= (blocksize-1);
2313 (*bytes)++;
2314 }
2315 len = offset - zerofrom;
2316
2317 err = pagecache_write_begin(file, mapping, curpos, len,
2318 AOP_FLAG_UNINTERRUPTIBLE,
2319 &page, &fsdata);
2320 if (err)
2321 goto out;
2322 zero_user(page, zerofrom, len);
2323 err = pagecache_write_end(file, mapping, curpos, len, len,
2324 page, fsdata);
2325 if (err < 0)
2326 goto out;
2327 BUG_ON(err != len);
2328 err = 0;
2329 }
2330out:
2331 return err;
2332}
2333
2334
2335
2336
2337
2338int cont_write_begin(struct file *file, struct address_space *mapping,
2339 loff_t pos, unsigned len, unsigned flags,
2340 struct page **pagep, void **fsdata,
2341 get_block_t *get_block, loff_t *bytes)
2342{
2343 struct inode *inode = mapping->host;
2344 unsigned blocksize = 1 << inode->i_blkbits;
2345 unsigned zerofrom;
2346 int err;
2347
2348 err = cont_expand_zero(file, mapping, pos, bytes);
2349 if (err)
2350 return err;
2351
2352 zerofrom = *bytes & ~PAGE_CACHE_MASK;
2353 if (pos+len > *bytes && zerofrom & (blocksize-1)) {
2354 *bytes |= (blocksize-1);
2355 (*bytes)++;
2356 }
2357
2358 return block_write_begin(mapping, pos, len, flags, pagep, get_block);
2359}
2360EXPORT_SYMBOL(cont_write_begin);
2361
2362int block_commit_write(struct page *page, unsigned from, unsigned to)
2363{
2364 struct inode *inode = page->mapping->host;
2365 __block_commit_write(inode,page,from,to);
2366 return 0;
2367}
2368EXPORT_SYMBOL(block_commit_write);
2369
2370
2371
2372
2373
2374
2375
2376
2377
2378
2379
2380
2381
2382
2383
2384
2385
2386
2387
2388int __block_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf,
2389 get_block_t get_block)
2390{
2391 struct page *page = vmf->page;
2392 struct inode *inode = file_inode(vma->vm_file);
2393 unsigned long end;
2394 loff_t size;
2395 int ret;
2396
2397 lock_page(page);
2398 size = i_size_read(inode);
2399 if ((page->mapping != inode->i_mapping) ||
2400 (page_offset(page) > size)) {
2401
2402 ret = -EFAULT;
2403 goto out_unlock;
2404 }
2405
2406
2407 if (((page->index + 1) << PAGE_CACHE_SHIFT) > size)
2408 end = size & ~PAGE_CACHE_MASK;
2409 else
2410 end = PAGE_CACHE_SIZE;
2411
2412 ret = __block_write_begin(page, 0, end, get_block);
2413 if (!ret)
2414 ret = block_commit_write(page, 0, end);
2415
2416 if (unlikely(ret < 0))
2417 goto out_unlock;
2418 set_page_dirty(page);
2419 wait_for_stable_page(page);
2420 return 0;
2421out_unlock:
2422 unlock_page(page);
2423 return ret;
2424}
2425EXPORT_SYMBOL(__block_page_mkwrite);
2426
2427int block_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf,
2428 get_block_t get_block)
2429{
2430 int ret;
2431 struct super_block *sb = file_inode(vma->vm_file)->i_sb;
2432
2433 sb_start_pagefault(sb);
2434
2435
2436
2437
2438
2439 file_update_time(vma->vm_file);
2440
2441 ret = __block_page_mkwrite(vma, vmf, get_block);
2442 sb_end_pagefault(sb);
2443 return block_page_mkwrite_return(ret);
2444}
2445EXPORT_SYMBOL(block_page_mkwrite);
2446
2447
2448
2449
2450
2451
2452static void end_buffer_read_nobh(struct buffer_head *bh, int uptodate)
2453{
2454 __end_buffer_read_notouch(bh, uptodate);
2455}
2456
2457
2458
2459
2460
2461
2462static void attach_nobh_buffers(struct page *page, struct buffer_head *head)
2463{
2464 struct buffer_head *bh;
2465
2466 BUG_ON(!PageLocked(page));
2467
2468 spin_lock(&page->mapping->private_lock);
2469 bh = head;
2470 do {
2471 if (PageDirty(page))
2472 set_buffer_dirty(bh);
2473 if (!bh->b_this_page)
2474 bh->b_this_page = head;
2475 bh = bh->b_this_page;
2476 } while (bh != head);
2477 attach_page_buffers(page, head);
2478 spin_unlock(&page->mapping->private_lock);
2479}
2480
2481
2482
2483
2484
2485
2486int nobh_write_begin(struct address_space *mapping,
2487 loff_t pos, unsigned len, unsigned flags,
2488 struct page **pagep, void **fsdata,
2489 get_block_t *get_block)
2490{
2491 struct inode *inode = mapping->host;
2492 const unsigned blkbits = inode->i_blkbits;
2493 const unsigned blocksize = 1 << blkbits;
2494 struct buffer_head *head, *bh;
2495 struct page *page;
2496 pgoff_t index;
2497 unsigned from, to;
2498 unsigned block_in_page;
2499 unsigned block_start, block_end;
2500 sector_t block_in_file;
2501 int nr_reads = 0;
2502 int ret = 0;
2503 int is_mapped_to_disk = 1;
2504
2505 index = pos >> PAGE_CACHE_SHIFT;
2506 from = pos & (PAGE_CACHE_SIZE - 1);
2507 to = from + len;
2508
2509 page = grab_cache_page_write_begin(mapping, index, flags);
2510 if (!page)
2511 return -ENOMEM;
2512 *pagep = page;
2513 *fsdata = NULL;
2514
2515 if (page_has_buffers(page)) {
2516 ret = __block_write_begin(page, pos, len, get_block);
2517 if (unlikely(ret))
2518 goto out_release;
2519 return ret;
2520 }
2521
2522 if (PageMappedToDisk(page))
2523 return 0;
2524
2525
2526
2527
2528
2529
2530
2531
2532
2533
2534 head = alloc_page_buffers(page, blocksize, 0);
2535 if (!head) {
2536 ret = -ENOMEM;
2537 goto out_release;
2538 }
2539
2540 block_in_file = (sector_t)page->index << (PAGE_CACHE_SHIFT - blkbits);
2541
2542
2543
2544
2545
2546
2547 for (block_start = 0, block_in_page = 0, bh = head;
2548 block_start < PAGE_CACHE_SIZE;
2549 block_in_page++, block_start += blocksize, bh = bh->b_this_page) {
2550 int create;
2551
2552 block_end = block_start + blocksize;
2553 bh->b_state = 0;
2554 create = 1;
2555 if (block_start >= to)
2556 create = 0;
2557 ret = get_block(inode, block_in_file + block_in_page,
2558 bh, create);
2559 if (ret)
2560 goto failed;
2561 if (!buffer_mapped(bh))
2562 is_mapped_to_disk = 0;
2563 if (buffer_new(bh))
2564 unmap_underlying_metadata(bh->b_bdev, bh->b_blocknr);
2565 if (PageUptodate(page)) {
2566 set_buffer_uptodate(bh);
2567 continue;
2568 }
2569 if (buffer_new(bh) || !buffer_mapped(bh)) {
2570 zero_user_segments(page, block_start, from,
2571 to, block_end);
2572 continue;
2573 }
2574 if (buffer_uptodate(bh))
2575 continue;
2576 if (block_start < from || block_end > to) {
2577 lock_buffer(bh);
2578 bh->b_end_io = end_buffer_read_nobh;
2579 submit_bh(READ, bh);
2580 nr_reads++;
2581 }
2582 }
2583
2584 if (nr_reads) {
2585
2586
2587
2588
2589
2590 for (bh = head; bh; bh = bh->b_this_page) {
2591 wait_on_buffer(bh);
2592 if (!buffer_uptodate(bh))
2593 ret = -EIO;
2594 }
2595 if (ret)
2596 goto failed;
2597 }
2598
2599 if (is_mapped_to_disk)
2600 SetPageMappedToDisk(page);
2601
2602 *fsdata = head;
2603
2604 return 0;
2605
2606failed:
2607 BUG_ON(!ret);
2608
2609
2610
2611
2612
2613
2614
2615 attach_nobh_buffers(page, head);
2616 page_zero_new_buffers(page, from, to);
2617
2618out_release:
2619 unlock_page(page);
2620 page_cache_release(page);
2621 *pagep = NULL;
2622
2623 return ret;
2624}
2625EXPORT_SYMBOL(nobh_write_begin);
2626
2627int nobh_write_end(struct file *file, struct address_space *mapping,
2628 loff_t pos, unsigned len, unsigned copied,
2629 struct page *page, void *fsdata)
2630{
2631 struct inode *inode = page->mapping->host;
2632 struct buffer_head *head = fsdata;
2633 struct buffer_head *bh;
2634 BUG_ON(fsdata != NULL && page_has_buffers(page));
2635
2636 if (unlikely(copied < len) && head)
2637 attach_nobh_buffers(page, head);
2638 if (page_has_buffers(page))
2639 return generic_write_end(file, mapping, pos, len,
2640 copied, page, fsdata);
2641
2642 SetPageUptodate(page);
2643 set_page_dirty(page);
2644 if (pos+copied > inode->i_size) {
2645 i_size_write(inode, pos+copied);
2646 mark_inode_dirty(inode);
2647 }
2648
2649 unlock_page(page);
2650 page_cache_release(page);
2651
2652 while (head) {
2653 bh = head;
2654 head = head->b_this_page;
2655 free_buffer_head(bh);
2656 }
2657
2658 return copied;
2659}
2660EXPORT_SYMBOL(nobh_write_end);
2661
2662
2663
2664
2665
2666
2667int nobh_writepage(struct page *page, get_block_t *get_block,
2668 struct writeback_control *wbc)
2669{
2670 struct inode * const inode = page->mapping->host;
2671 loff_t i_size = i_size_read(inode);
2672 const pgoff_t end_index = i_size >> PAGE_CACHE_SHIFT;
2673 unsigned offset;
2674 int ret;
2675
2676
2677 if (page->index < end_index)
2678 goto out;
2679
2680
2681 offset = i_size & (PAGE_CACHE_SIZE-1);
2682 if (page->index >= end_index+1 || !offset) {
2683
2684
2685
2686
2687
2688#if 0
2689
2690 if (page->mapping->a_ops->invalidatepage)
2691 page->mapping->a_ops->invalidatepage(page, offset);
2692#endif
2693 unlock_page(page);
2694 return 0;
2695 }
2696
2697
2698
2699
2700
2701
2702
2703
2704 zero_user_segment(page, offset, PAGE_CACHE_SIZE);
2705out:
2706 ret = mpage_writepage(page, get_block, wbc);
2707 if (ret == -EAGAIN)
2708 ret = __block_write_full_page(inode, page, get_block, wbc,
2709 end_buffer_async_write);
2710 return ret;
2711}
2712EXPORT_SYMBOL(nobh_writepage);
2713
2714int nobh_truncate_page(struct address_space *mapping,
2715 loff_t from, get_block_t *get_block)
2716{
2717 pgoff_t index = from >> PAGE_CACHE_SHIFT;
2718 unsigned offset = from & (PAGE_CACHE_SIZE-1);
2719 unsigned blocksize;
2720 sector_t iblock;
2721 unsigned length, pos;
2722 struct inode *inode = mapping->host;
2723 struct page *page;
2724 struct buffer_head map_bh;
2725 int err;
2726
2727 blocksize = 1 << inode->i_blkbits;
2728 length = offset & (blocksize - 1);
2729
2730
2731 if (!length)
2732 return 0;
2733
2734 length = blocksize - length;
2735 iblock = (sector_t)index << (PAGE_CACHE_SHIFT - inode->i_blkbits);
2736
2737 page = grab_cache_page(mapping, index);
2738 err = -ENOMEM;
2739 if (!page)
2740 goto out;
2741
2742 if (page_has_buffers(page)) {
2743has_buffers:
2744 unlock_page(page);
2745 page_cache_release(page);
2746 return block_truncate_page(mapping, from, get_block);
2747 }
2748
2749
2750 pos = blocksize;
2751 while (offset >= pos) {
2752 iblock++;
2753 pos += blocksize;
2754 }
2755
2756 map_bh.b_size = blocksize;
2757 map_bh.b_state = 0;
2758 err = get_block(inode, iblock, &map_bh, 0);
2759 if (err)
2760 goto unlock;
2761
2762 if (!buffer_mapped(&map_bh))
2763 goto unlock;
2764
2765
2766 if (!PageUptodate(page)) {
2767 err = mapping->a_ops->readpage(NULL, page);
2768 if (err) {
2769 page_cache_release(page);
2770 goto out;
2771 }
2772 lock_page(page);
2773 if (!PageUptodate(page)) {
2774 err = -EIO;
2775 goto unlock;
2776 }
2777 if (page_has_buffers(page))
2778 goto has_buffers;
2779 }
2780 zero_user(page, offset, length);
2781 set_page_dirty(page);
2782 err = 0;
2783
2784unlock:
2785 unlock_page(page);
2786 page_cache_release(page);
2787out:
2788 return err;
2789}
2790EXPORT_SYMBOL(nobh_truncate_page);
2791
2792int block_truncate_page(struct address_space *mapping,
2793 loff_t from, get_block_t *get_block)
2794{
2795 pgoff_t index = from >> PAGE_CACHE_SHIFT;
2796 unsigned offset = from & (PAGE_CACHE_SIZE-1);
2797 unsigned blocksize;
2798 sector_t iblock;
2799 unsigned length, pos;
2800 struct inode *inode = mapping->host;
2801 struct page *page;
2802 struct buffer_head *bh;
2803 int err;
2804
2805 blocksize = 1 << inode->i_blkbits;
2806 length = offset & (blocksize - 1);
2807
2808
2809 if (!length)
2810 return 0;
2811
2812 length = blocksize - length;
2813 iblock = (sector_t)index << (PAGE_CACHE_SHIFT - inode->i_blkbits);
2814
2815 page = grab_cache_page(mapping, index);
2816 err = -ENOMEM;
2817 if (!page)
2818 goto out;
2819
2820 if (!page_has_buffers(page))
2821 create_empty_buffers(page, blocksize, 0);
2822
2823
2824 bh = page_buffers(page);
2825 pos = blocksize;
2826 while (offset >= pos) {
2827 bh = bh->b_this_page;
2828 iblock++;
2829 pos += blocksize;
2830 }
2831
2832 err = 0;
2833 if (!buffer_mapped(bh)) {
2834 WARN_ON(bh->b_size != blocksize);
2835 err = get_block(inode, iblock, bh, 0);
2836 if (err)
2837 goto unlock;
2838
2839 if (!buffer_mapped(bh))
2840 goto unlock;
2841 }
2842
2843
2844 if (PageUptodate(page))
2845 set_buffer_uptodate(bh);
2846
2847 if (!buffer_uptodate(bh) && !buffer_delay(bh) && !buffer_unwritten(bh)) {
2848 err = -EIO;
2849 ll_rw_block(READ, 1, &bh);
2850 wait_on_buffer(bh);
2851
2852 if (!buffer_uptodate(bh))
2853 goto unlock;
2854 }
2855
2856 zero_user(page, offset, length);
2857 mark_buffer_dirty(bh);
2858 err = 0;
2859
2860unlock:
2861 unlock_page(page);
2862 page_cache_release(page);
2863out:
2864 return err;
2865}
2866EXPORT_SYMBOL(block_truncate_page);
2867
2868
2869
2870
2871
2872int block_write_full_page_endio(struct page *page, get_block_t *get_block,
2873 struct writeback_control *wbc, bh_end_io_t *handler)
2874{
2875 struct inode * const inode = page->mapping->host;
2876 loff_t i_size = i_size_read(inode);
2877 const pgoff_t end_index = i_size >> PAGE_CACHE_SHIFT;
2878 unsigned offset;
2879
2880
2881 if (page->index < end_index)
2882 return __block_write_full_page(inode, page, get_block, wbc,
2883 handler);
2884
2885
2886 offset = i_size & (PAGE_CACHE_SIZE-1);
2887 if (page->index >= end_index+1 || !offset) {
2888
2889
2890
2891
2892
2893 do_invalidatepage(page, 0, PAGE_CACHE_SIZE);
2894 unlock_page(page);
2895 return 0;
2896 }
2897
2898
2899
2900
2901
2902
2903
2904
2905 zero_user_segment(page, offset, PAGE_CACHE_SIZE);
2906 return __block_write_full_page(inode, page, get_block, wbc, handler);
2907}
2908EXPORT_SYMBOL(block_write_full_page_endio);
2909
2910
2911
2912
2913int block_write_full_page(struct page *page, get_block_t *get_block,
2914 struct writeback_control *wbc)
2915{
2916 return block_write_full_page_endio(page, get_block, wbc,
2917 end_buffer_async_write);
2918}
2919EXPORT_SYMBOL(block_write_full_page);
2920
2921sector_t generic_block_bmap(struct address_space *mapping, sector_t block,
2922 get_block_t *get_block)
2923{
2924 struct buffer_head tmp;
2925 struct inode *inode = mapping->host;
2926 tmp.b_state = 0;
2927 tmp.b_blocknr = 0;
2928 tmp.b_size = 1 << inode->i_blkbits;
2929 get_block(inode, block, &tmp, 0);
2930 return tmp.b_blocknr;
2931}
2932EXPORT_SYMBOL(generic_block_bmap);
2933
2934static void end_bio_bh_io_sync(struct bio *bio, int err)
2935{
2936 struct buffer_head *bh = bio->bi_private;
2937
2938 if (err == -EOPNOTSUPP) {
2939 set_bit(BIO_EOPNOTSUPP, &bio->bi_flags);
2940 }
2941
2942 if (unlikely (test_bit(BIO_QUIET,&bio->bi_flags)))
2943 set_bit(BH_Quiet, &bh->b_state);
2944
2945 bh->b_end_io(bh, test_bit(BIO_UPTODATE, &bio->bi_flags));
2946 bio_put(bio);
2947}
2948
2949
2950
2951
2952
2953
2954
2955
2956
2957
2958
2959
2960
2961static void guard_bh_eod(int rw, struct bio *bio, struct buffer_head *bh)
2962{
2963 sector_t maxsector;
2964 unsigned bytes;
2965
2966 maxsector = i_size_read(bio->bi_bdev->bd_inode) >> 9;
2967 if (!maxsector)
2968 return;
2969
2970
2971
2972
2973
2974
2975 if (unlikely(bio->bi_sector >= maxsector))
2976 return;
2977
2978 maxsector -= bio->bi_sector;
2979 bytes = bio->bi_size;
2980 if (likely((bytes >> 9) <= maxsector))
2981 return;
2982
2983
2984 bytes = maxsector << 9;
2985
2986
2987 bio->bi_size = bytes;
2988 bio->bi_io_vec[0].bv_len = bytes;
2989
2990
2991 if ((rw & RW_MASK) == READ) {
2992 void *kaddr = kmap_atomic(bh->b_page);
2993 memset(kaddr + bh_offset(bh) + bytes, 0, bh->b_size - bytes);
2994 kunmap_atomic(kaddr);
2995 flush_dcache_page(bh->b_page);
2996 }
2997}
2998
2999int _submit_bh(int rw, struct buffer_head *bh, unsigned long bio_flags)
3000{
3001 struct bio *bio;
3002 int ret = 0;
3003
3004 BUG_ON(!buffer_locked(bh));
3005 BUG_ON(!buffer_mapped(bh));
3006 BUG_ON(!bh->b_end_io);
3007 BUG_ON(buffer_delay(bh));
3008 BUG_ON(buffer_unwritten(bh));
3009
3010
3011
3012
3013 if (test_set_buffer_req(bh) && (rw & WRITE))
3014 clear_buffer_write_io_error(bh);
3015
3016
3017
3018
3019
3020 bio = bio_alloc(GFP_NOIO, 1);
3021
3022 bio->bi_sector = bh->b_blocknr * (bh->b_size >> 9);
3023 bio->bi_bdev = bh->b_bdev;
3024 bio->bi_io_vec[0].bv_page = bh->b_page;
3025 bio->bi_io_vec[0].bv_len = bh->b_size;
3026 bio->bi_io_vec[0].bv_offset = bh_offset(bh);
3027
3028 bio->bi_vcnt = 1;
3029 bio->bi_size = bh->b_size;
3030
3031 bio->bi_end_io = end_bio_bh_io_sync;
3032 bio->bi_private = bh;
3033 bio->bi_flags |= bio_flags;
3034
3035
3036 guard_bh_eod(rw, bio, bh);
3037
3038 if (buffer_meta(bh))
3039 rw |= REQ_META;
3040 if (buffer_prio(bh))
3041 rw |= REQ_PRIO;
3042
3043 bio_get(bio);
3044 submit_bio(rw, bio);
3045
3046 if (bio_flagged(bio, BIO_EOPNOTSUPP))
3047 ret = -EOPNOTSUPP;
3048
3049 bio_put(bio);
3050 return ret;
3051}
3052EXPORT_SYMBOL_GPL(_submit_bh);
3053
3054int submit_bh(int rw, struct buffer_head *bh)
3055{
3056 return _submit_bh(rw, bh, 0);
3057}
3058EXPORT_SYMBOL(submit_bh);
3059
3060
3061
3062
3063
3064
3065
3066
3067
3068
3069
3070
3071
3072
3073
3074
3075
3076
3077
3078
3079
3080
3081
3082
3083
3084
3085void ll_rw_block(int rw, int nr, struct buffer_head *bhs[])
3086{
3087 int i;
3088
3089 for (i = 0; i < nr; i++) {
3090 struct buffer_head *bh = bhs[i];
3091
3092 if (!trylock_buffer(bh))
3093 continue;
3094 if (rw == WRITE) {
3095 if (test_clear_buffer_dirty(bh)) {
3096 bh->b_end_io = end_buffer_write_sync;
3097 get_bh(bh);
3098 submit_bh(WRITE, bh);
3099 continue;
3100 }
3101 } else {
3102 if (!buffer_uptodate(bh)) {
3103 bh->b_end_io = end_buffer_read_sync;
3104 get_bh(bh);
3105 submit_bh(rw, bh);
3106 continue;
3107 }
3108 }
3109 unlock_buffer(bh);
3110 }
3111}
3112EXPORT_SYMBOL(ll_rw_block);
3113
3114void write_dirty_buffer(struct buffer_head *bh, int rw)
3115{
3116 lock_buffer(bh);
3117 if (!test_clear_buffer_dirty(bh)) {
3118 unlock_buffer(bh);
3119 return;
3120 }
3121 bh->b_end_io = end_buffer_write_sync;
3122 get_bh(bh);
3123 submit_bh(rw, bh);
3124}
3125EXPORT_SYMBOL(write_dirty_buffer);
3126
3127
3128
3129
3130
3131
3132int __sync_dirty_buffer(struct buffer_head *bh, int rw)
3133{
3134 int ret = 0;
3135
3136 WARN_ON(atomic_read(&bh->b_count) < 1);
3137 lock_buffer(bh);
3138 if (test_clear_buffer_dirty(bh)) {
3139 get_bh(bh);
3140 bh->b_end_io = end_buffer_write_sync;
3141 ret = submit_bh(rw, bh);
3142 wait_on_buffer(bh);
3143 if (!ret && !buffer_uptodate(bh))
3144 ret = -EIO;
3145 } else {
3146 unlock_buffer(bh);
3147 }
3148 return ret;
3149}
3150EXPORT_SYMBOL(__sync_dirty_buffer);
3151
3152int sync_dirty_buffer(struct buffer_head *bh)
3153{
3154 return __sync_dirty_buffer(bh, WRITE_SYNC);
3155}
3156EXPORT_SYMBOL(sync_dirty_buffer);
3157
3158
3159
3160
3161
3162
3163
3164
3165
3166
3167
3168
3169
3170
3171
3172
3173
3174
3175
3176
3177
3178static inline int buffer_busy(struct buffer_head *bh)
3179{
3180 return atomic_read(&bh->b_count) |
3181 (bh->b_state & ((1 << BH_Dirty) | (1 << BH_Lock)));
3182}
3183
3184static int
3185drop_buffers(struct page *page, struct buffer_head **buffers_to_free)
3186{
3187 struct buffer_head *head = page_buffers(page);
3188 struct buffer_head *bh;
3189
3190 bh = head;
3191 do {
3192 if (buffer_write_io_error(bh) && page->mapping)
3193 set_bit(AS_EIO, &page->mapping->flags);
3194 if (buffer_busy(bh))
3195 goto failed;
3196 bh = bh->b_this_page;
3197 } while (bh != head);
3198
3199 do {
3200 struct buffer_head *next = bh->b_this_page;
3201
3202 if (bh->b_assoc_map)
3203 __remove_assoc_queue(bh);
3204 bh = next;
3205 } while (bh != head);
3206 *buffers_to_free = head;
3207 __clear_page_buffers(page);
3208 return 1;
3209failed:
3210 return 0;
3211}
3212
3213int try_to_free_buffers(struct page *page)
3214{
3215 struct address_space * const mapping = page->mapping;
3216 struct buffer_head *buffers_to_free = NULL;
3217 int ret = 0;
3218
3219 BUG_ON(!PageLocked(page));
3220 if (PageWriteback(page))
3221 return 0;
3222
3223 if (mapping == NULL) {
3224 ret = drop_buffers(page, &buffers_to_free);
3225 goto out;
3226 }
3227
3228 spin_lock(&mapping->private_lock);
3229 ret = drop_buffers(page, &buffers_to_free);
3230
3231
3232
3233
3234
3235
3236
3237
3238
3239
3240
3241
3242
3243
3244
3245 if (ret)
3246 cancel_dirty_page(page, PAGE_CACHE_SIZE);
3247 spin_unlock(&mapping->private_lock);
3248out:
3249 if (buffers_to_free) {
3250 struct buffer_head *bh = buffers_to_free;
3251
3252 do {
3253 struct buffer_head *next = bh->b_this_page;
3254 free_buffer_head(bh);
3255 bh = next;
3256 } while (bh != buffers_to_free);
3257 }
3258 return ret;
3259}
3260EXPORT_SYMBOL(try_to_free_buffers);
3261
3262
3263
3264
3265
3266
3267
3268
3269SYSCALL_DEFINE2(bdflush, int, func, long, data)
3270{
3271 static int msg_count;
3272
3273 if (!capable(CAP_SYS_ADMIN))
3274 return -EPERM;
3275
3276 if (msg_count < 5) {
3277 msg_count++;
3278 printk(KERN_INFO
3279 "warning: process `%s' used the obsolete bdflush"
3280 " system call\n", current->comm);
3281 printk(KERN_INFO "Fix your initscripts?\n");
3282 }
3283
3284 if (func == 1)
3285 do_exit(0);
3286 return 0;
3287}
3288
3289
3290
3291
3292static struct kmem_cache *bh_cachep __read_mostly;
3293
3294
3295
3296
3297
3298static unsigned long max_buffer_heads;
3299
3300int buffer_heads_over_limit;
3301
3302struct bh_accounting {
3303 int nr;
3304 int ratelimit;
3305};
3306
3307static DEFINE_PER_CPU(struct bh_accounting, bh_accounting) = {0, 0};
3308
3309static void recalc_bh_state(void)
3310{
3311 int i;
3312 int tot = 0;
3313
3314 if (__this_cpu_inc_return(bh_accounting.ratelimit) - 1 < 4096)
3315 return;
3316 __this_cpu_write(bh_accounting.ratelimit, 0);
3317 for_each_online_cpu(i)
3318 tot += per_cpu(bh_accounting, i).nr;
3319 buffer_heads_over_limit = (tot > max_buffer_heads);
3320}
3321
3322struct buffer_head *alloc_buffer_head(gfp_t gfp_flags)
3323{
3324 struct buffer_head *ret = kmem_cache_zalloc(bh_cachep, gfp_flags);
3325 if (ret) {
3326 INIT_LIST_HEAD(&ret->b_assoc_buffers);
3327 preempt_disable();
3328 __this_cpu_inc(bh_accounting.nr);
3329 recalc_bh_state();
3330 preempt_enable();
3331 }
3332 return ret;
3333}
3334EXPORT_SYMBOL(alloc_buffer_head);
3335
3336void free_buffer_head(struct buffer_head *bh)
3337{
3338 BUG_ON(!list_empty(&bh->b_assoc_buffers));
3339 kmem_cache_free(bh_cachep, bh);
3340 preempt_disable();
3341 __this_cpu_dec(bh_accounting.nr);
3342 recalc_bh_state();
3343 preempt_enable();
3344}
3345EXPORT_SYMBOL(free_buffer_head);
3346
3347static void buffer_exit_cpu(int cpu)
3348{
3349 int i;
3350 struct bh_lru *b = &per_cpu(bh_lrus, cpu);
3351
3352 for (i = 0; i < BH_LRU_SIZE; i++) {
3353 brelse(b->bhs[i]);
3354 b->bhs[i] = NULL;
3355 }
3356 this_cpu_add(bh_accounting.nr, per_cpu(bh_accounting, cpu).nr);
3357 per_cpu(bh_accounting, cpu).nr = 0;
3358}
3359
3360static int buffer_cpu_notify(struct notifier_block *self,
3361 unsigned long action, void *hcpu)
3362{
3363 if (action == CPU_DEAD || action == CPU_DEAD_FROZEN)
3364 buffer_exit_cpu((unsigned long)hcpu);
3365 return NOTIFY_OK;
3366}
3367
3368
3369
3370
3371
3372
3373
3374
3375int bh_uptodate_or_lock(struct buffer_head *bh)
3376{
3377 if (!buffer_uptodate(bh)) {
3378 lock_buffer(bh);
3379 if (!buffer_uptodate(bh))
3380 return 0;
3381 unlock_buffer(bh);
3382 }
3383 return 1;
3384}
3385EXPORT_SYMBOL(bh_uptodate_or_lock);
3386
3387
3388
3389
3390
3391
3392
3393int bh_submit_read(struct buffer_head *bh)
3394{
3395 BUG_ON(!buffer_locked(bh));
3396
3397 if (buffer_uptodate(bh)) {
3398 unlock_buffer(bh);
3399 return 0;
3400 }
3401
3402 get_bh(bh);
3403 bh->b_end_io = end_buffer_read_sync;
3404 submit_bh(READ, bh);
3405 wait_on_buffer(bh);
3406 if (buffer_uptodate(bh))
3407 return 0;
3408 return -EIO;
3409}
3410EXPORT_SYMBOL(bh_submit_read);
3411
3412void __init buffer_init(void)
3413{
3414 unsigned long nrpages;
3415
3416 bh_cachep = kmem_cache_create("buffer_head",
3417 sizeof(struct buffer_head), 0,
3418 (SLAB_RECLAIM_ACCOUNT|SLAB_PANIC|
3419 SLAB_MEM_SPREAD),
3420 NULL);
3421
3422
3423
3424
3425 nrpages = (nr_free_buffer_pages() * 10) / 100;
3426 max_buffer_heads = nrpages * (PAGE_SIZE / sizeof(struct buffer_head));
3427 hotcpu_notifier(buffer_cpu_notify, 0);
3428}
3429