1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21#include <linux/kernel.h>
22#include <linux/syscalls.h>
23#include <linux/fs.h>
24#include <linux/mm.h>
25#include <linux/percpu.h>
26#include <linux/slab.h>
27#include <linux/capability.h>
28#include <linux/blkdev.h>
29#include <linux/file.h>
30#include <linux/quotaops.h>
31#include <linux/highmem.h>
32#include <linux/export.h>
33#include <linux/writeback.h>
34#include <linux/hash.h>
35#include <linux/suspend.h>
36#include <linux/buffer_head.h>
37#include <linux/task_io_accounting_ops.h>
38#include <linux/bio.h>
39#include <linux/notifier.h>
40#include <linux/cpu.h>
41#include <linux/bitops.h>
42#include <linux/mpage.h>
43#include <linux/bit_spinlock.h>
44#include <trace/events/block.h>
45
46static int fsync_buffers_list(spinlock_t *lock, struct list_head *list);
47
48#define BH_ENTRY(list) list_entry((list), struct buffer_head, b_assoc_buffers)
49
50void init_buffer(struct buffer_head *bh, bh_end_io_t *handler, void *private)
51{
52 bh->b_end_io = handler;
53 bh->b_private = private;
54}
55EXPORT_SYMBOL(init_buffer);
56
57inline void touch_buffer(struct buffer_head *bh)
58{
59 trace_block_touch_buffer(bh);
60 mark_page_accessed(bh->b_page);
61}
62EXPORT_SYMBOL(touch_buffer);
63
64void __lock_buffer(struct buffer_head *bh)
65{
66 wait_on_bit_lock_io(&bh->b_state, BH_Lock, TASK_UNINTERRUPTIBLE);
67}
68EXPORT_SYMBOL(__lock_buffer);
69
70void unlock_buffer(struct buffer_head *bh)
71{
72 clear_bit_unlock(BH_Lock, &bh->b_state);
73 smp_mb__after_atomic();
74 wake_up_bit(&bh->b_state, BH_Lock);
75}
76EXPORT_SYMBOL(unlock_buffer);
77
78
79
80
81
82
83void buffer_check_dirty_writeback(struct page *page,
84 bool *dirty, bool *writeback)
85{
86 struct buffer_head *head, *bh;
87 *dirty = false;
88 *writeback = false;
89
90 BUG_ON(!PageLocked(page));
91
92 if (!page_has_buffers(page))
93 return;
94
95 if (PageWriteback(page))
96 *writeback = true;
97
98 head = page_buffers(page);
99 bh = head;
100 do {
101 if (buffer_locked(bh))
102 *writeback = true;
103
104 if (buffer_dirty(bh))
105 *dirty = true;
106
107 bh = bh->b_this_page;
108 } while (bh != head);
109}
110EXPORT_SYMBOL(buffer_check_dirty_writeback);
111
112
113
114
115
116
117void __wait_on_buffer(struct buffer_head * bh)
118{
119 wait_on_bit_io(&bh->b_state, BH_Lock, TASK_UNINTERRUPTIBLE);
120}
121EXPORT_SYMBOL(__wait_on_buffer);
122
123static void
124__clear_page_buffers(struct page *page)
125{
126 ClearPagePrivate(page);
127 set_page_private(page, 0);
128 page_cache_release(page);
129}
130
131
132static int quiet_error(struct buffer_head *bh)
133{
134 if (!test_bit(BH_Quiet, &bh->b_state) && printk_ratelimit())
135 return 0;
136 return 1;
137}
138
139
140static void buffer_io_error(struct buffer_head *bh)
141{
142 char b[BDEVNAME_SIZE];
143 printk(KERN_ERR "Buffer I/O error on device %s, logical block %Lu\n",
144 bdevname(bh->b_bdev, b),
145 (unsigned long long)bh->b_blocknr);
146}
147
148
149
150
151
152
153
154
155
156static void __end_buffer_read_notouch(struct buffer_head *bh, int uptodate)
157{
158 if (uptodate) {
159 set_buffer_uptodate(bh);
160 } else {
161
162 clear_buffer_uptodate(bh);
163 }
164 unlock_buffer(bh);
165}
166
167
168
169
170
171void end_buffer_read_sync(struct buffer_head *bh, int uptodate)
172{
173 __end_buffer_read_notouch(bh, uptodate);
174 put_bh(bh);
175}
176EXPORT_SYMBOL(end_buffer_read_sync);
177
178void end_buffer_write_sync(struct buffer_head *bh, int uptodate)
179{
180 char b[BDEVNAME_SIZE];
181
182 if (uptodate) {
183 set_buffer_uptodate(bh);
184 } else {
185 if (!quiet_error(bh)) {
186 buffer_io_error(bh);
187 printk(KERN_WARNING "lost page write due to "
188 "I/O error on %s\n",
189 bdevname(bh->b_bdev, b));
190 }
191 set_buffer_write_io_error(bh);
192 clear_buffer_uptodate(bh);
193 }
194 unlock_buffer(bh);
195 put_bh(bh);
196}
197EXPORT_SYMBOL(end_buffer_write_sync);
198
199
200
201
202
203
204
205
206
207
208
209
210static struct buffer_head *
211__find_get_block_slow(struct block_device *bdev, sector_t block)
212{
213 struct inode *bd_inode = bdev->bd_inode;
214 struct address_space *bd_mapping = bd_inode->i_mapping;
215 struct buffer_head *ret = NULL;
216 pgoff_t index;
217 struct buffer_head *bh;
218 struct buffer_head *head;
219 struct page *page;
220 int all_mapped = 1;
221
222 index = block >> (PAGE_CACHE_SHIFT - bd_inode->i_blkbits);
223 page = find_get_page_flags(bd_mapping, index, FGP_ACCESSED);
224 if (!page)
225 goto out;
226
227 spin_lock(&bd_mapping->private_lock);
228 if (!page_has_buffers(page))
229 goto out_unlock;
230 head = page_buffers(page);
231 bh = head;
232 do {
233 if (!buffer_mapped(bh))
234 all_mapped = 0;
235 else if (bh->b_blocknr == block) {
236 ret = bh;
237 get_bh(bh);
238 goto out_unlock;
239 }
240 bh = bh->b_this_page;
241 } while (bh != head);
242
243
244
245
246
247
248 if (all_mapped) {
249 char b[BDEVNAME_SIZE];
250
251 printk("__find_get_block_slow() failed. "
252 "block=%llu, b_blocknr=%llu\n",
253 (unsigned long long)block,
254 (unsigned long long)bh->b_blocknr);
255 printk("b_state=0x%08lx, b_size=%zu\n",
256 bh->b_state, bh->b_size);
257 printk("device %s blocksize: %d\n", bdevname(bdev, b),
258 1 << bd_inode->i_blkbits);
259 }
260out_unlock:
261 spin_unlock(&bd_mapping->private_lock);
262 page_cache_release(page);
263out:
264 return ret;
265}
266
267
268
269
270static void free_more_memory(void)
271{
272 struct zone *zone;
273 int nid;
274
275 wakeup_flusher_threads(1024, WB_REASON_FREE_MORE_MEM);
276 yield();
277
278 for_each_online_node(nid) {
279 (void)first_zones_zonelist(node_zonelist(nid, GFP_NOFS),
280 gfp_zone(GFP_NOFS), NULL,
281 &zone);
282 if (zone)
283 try_to_free_pages(node_zonelist(nid, GFP_NOFS), 0,
284 GFP_NOFS, NULL);
285 }
286}
287
288
289
290
291
292static void end_buffer_async_read(struct buffer_head *bh, int uptodate)
293{
294 unsigned long flags;
295 struct buffer_head *first;
296 struct buffer_head *tmp;
297 struct page *page;
298 int page_uptodate = 1;
299
300 BUG_ON(!buffer_async_read(bh));
301
302 page = bh->b_page;
303 if (uptodate) {
304 set_buffer_uptodate(bh);
305 } else {
306 clear_buffer_uptodate(bh);
307 if (!quiet_error(bh))
308 buffer_io_error(bh);
309 SetPageError(page);
310 }
311
312
313
314
315
316
317 first = page_buffers(page);
318 local_irq_save(flags);
319 bit_spin_lock(BH_Uptodate_Lock, &first->b_state);
320 clear_buffer_async_read(bh);
321 unlock_buffer(bh);
322 tmp = bh;
323 do {
324 if (!buffer_uptodate(tmp))
325 page_uptodate = 0;
326 if (buffer_async_read(tmp)) {
327 BUG_ON(!buffer_locked(tmp));
328 goto still_busy;
329 }
330 tmp = tmp->b_this_page;
331 } while (tmp != bh);
332 bit_spin_unlock(BH_Uptodate_Lock, &first->b_state);
333 local_irq_restore(flags);
334
335
336
337
338
339 if (page_uptodate && !PageError(page))
340 SetPageUptodate(page);
341 unlock_page(page);
342 return;
343
344still_busy:
345 bit_spin_unlock(BH_Uptodate_Lock, &first->b_state);
346 local_irq_restore(flags);
347 return;
348}
349
350
351
352
353
354void end_buffer_async_write(struct buffer_head *bh, int uptodate)
355{
356 char b[BDEVNAME_SIZE];
357 unsigned long flags;
358 struct buffer_head *first;
359 struct buffer_head *tmp;
360 struct page *page;
361
362 BUG_ON(!buffer_async_write(bh));
363
364 page = bh->b_page;
365 if (uptodate) {
366 set_buffer_uptodate(bh);
367 } else {
368 if (!quiet_error(bh)) {
369 buffer_io_error(bh);
370 printk(KERN_WARNING "lost page write due to "
371 "I/O error on %s\n",
372 bdevname(bh->b_bdev, b));
373 }
374 set_bit(AS_EIO, &page->mapping->flags);
375 set_buffer_write_io_error(bh);
376 clear_buffer_uptodate(bh);
377 SetPageError(page);
378 }
379
380 first = page_buffers(page);
381 local_irq_save(flags);
382 bit_spin_lock(BH_Uptodate_Lock, &first->b_state);
383
384 clear_buffer_async_write(bh);
385 unlock_buffer(bh);
386 tmp = bh->b_this_page;
387 while (tmp != bh) {
388 if (buffer_async_write(tmp)) {
389 BUG_ON(!buffer_locked(tmp));
390 goto still_busy;
391 }
392 tmp = tmp->b_this_page;
393 }
394 bit_spin_unlock(BH_Uptodate_Lock, &first->b_state);
395 local_irq_restore(flags);
396 end_page_writeback(page);
397 return;
398
399still_busy:
400 bit_spin_unlock(BH_Uptodate_Lock, &first->b_state);
401 local_irq_restore(flags);
402 return;
403}
404EXPORT_SYMBOL(end_buffer_async_write);
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427static void mark_buffer_async_read(struct buffer_head *bh)
428{
429 bh->b_end_io = end_buffer_async_read;
430 set_buffer_async_read(bh);
431}
432
433static void mark_buffer_async_write_endio(struct buffer_head *bh,
434 bh_end_io_t *handler)
435{
436 bh->b_end_io = handler;
437 set_buffer_async_write(bh);
438}
439
440void mark_buffer_async_write(struct buffer_head *bh)
441{
442 mark_buffer_async_write_endio(bh, end_buffer_async_write);
443}
444EXPORT_SYMBOL(mark_buffer_async_write);
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499static void __remove_assoc_queue(struct buffer_head *bh)
500{
501 list_del_init(&bh->b_assoc_buffers);
502 WARN_ON(!bh->b_assoc_map);
503 if (buffer_write_io_error(bh))
504 set_bit(AS_EIO, &bh->b_assoc_map->flags);
505 bh->b_assoc_map = NULL;
506}
507
508int inode_has_buffers(struct inode *inode)
509{
510 return !list_empty(&inode->i_data.private_list);
511}
512
513
514
515
516
517
518
519
520
521
522
523static int osync_buffers_list(spinlock_t *lock, struct list_head *list)
524{
525 struct buffer_head *bh;
526 struct list_head *p;
527 int err = 0;
528
529 spin_lock(lock);
530repeat:
531 list_for_each_prev(p, list) {
532 bh = BH_ENTRY(p);
533 if (buffer_locked(bh)) {
534 get_bh(bh);
535 spin_unlock(lock);
536 wait_on_buffer(bh);
537 if (!buffer_uptodate(bh))
538 err = -EIO;
539 brelse(bh);
540 spin_lock(lock);
541 goto repeat;
542 }
543 }
544 spin_unlock(lock);
545 return err;
546}
547
548static void do_thaw_one(struct super_block *sb, void *unused)
549{
550 char b[BDEVNAME_SIZE];
551 while (sb->s_bdev && !thaw_bdev(sb->s_bdev, sb))
552 printk(KERN_WARNING "Emergency Thaw on %s\n",
553 bdevname(sb->s_bdev, b));
554}
555
556static void do_thaw_all(struct work_struct *work)
557{
558 iterate_supers(do_thaw_one, NULL);
559 kfree(work);
560 printk(KERN_WARNING "Emergency Thaw complete\n");
561}
562
563
564
565
566
567
568void emergency_thaw_all(void)
569{
570 struct work_struct *work;
571
572 work = kmalloc(sizeof(*work), GFP_ATOMIC);
573 if (work) {
574 INIT_WORK(work, do_thaw_all);
575 schedule_work(work);
576 }
577}
578
579
580
581
582
583
584
585
586
587
588
589
590int sync_mapping_buffers(struct address_space *mapping)
591{
592 struct address_space *buffer_mapping = mapping->private_data;
593
594 if (buffer_mapping == NULL || list_empty(&mapping->private_list))
595 return 0;
596
597 return fsync_buffers_list(&buffer_mapping->private_lock,
598 &mapping->private_list);
599}
600EXPORT_SYMBOL(sync_mapping_buffers);
601
602
603
604
605
606
607
608void write_boundary_block(struct block_device *bdev,
609 sector_t bblock, unsigned blocksize)
610{
611 struct buffer_head *bh = __find_get_block(bdev, bblock + 1, blocksize);
612 if (bh) {
613 if (buffer_dirty(bh))
614 ll_rw_block(WRITE, 1, &bh);
615 put_bh(bh);
616 }
617}
618
619void mark_buffer_dirty_inode(struct buffer_head *bh, struct inode *inode)
620{
621 struct address_space *mapping = inode->i_mapping;
622 struct address_space *buffer_mapping = bh->b_page->mapping;
623
624 mark_buffer_dirty(bh);
625 if (!mapping->private_data) {
626 mapping->private_data = buffer_mapping;
627 } else {
628 BUG_ON(mapping->private_data != buffer_mapping);
629 }
630 if (!bh->b_assoc_map) {
631 spin_lock(&buffer_mapping->private_lock);
632 list_move_tail(&bh->b_assoc_buffers,
633 &mapping->private_list);
634 bh->b_assoc_map = mapping;
635 spin_unlock(&buffer_mapping->private_lock);
636 }
637}
638EXPORT_SYMBOL(mark_buffer_dirty_inode);
639
640
641
642
643
644
645
646
647static void __set_page_dirty(struct page *page,
648 struct address_space *mapping, int warn)
649{
650 unsigned long flags;
651
652 spin_lock_irqsave(&mapping->tree_lock, flags);
653 if (page->mapping) {
654 WARN_ON_ONCE(warn && !PageUptodate(page));
655 account_page_dirtied(page, mapping);
656 radix_tree_tag_set(&mapping->page_tree,
657 page_index(page), PAGECACHE_TAG_DIRTY);
658 }
659 spin_unlock_irqrestore(&mapping->tree_lock, flags);
660 __mark_inode_dirty(mapping->host, I_DIRTY_PAGES);
661}
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688int __set_page_dirty_buffers(struct page *page)
689{
690 int newly_dirty;
691 struct address_space *mapping = page_mapping(page);
692
693 if (unlikely(!mapping))
694 return !TestSetPageDirty(page);
695
696 spin_lock(&mapping->private_lock);
697 if (page_has_buffers(page)) {
698 struct buffer_head *head = page_buffers(page);
699 struct buffer_head *bh = head;
700
701 do {
702 set_buffer_dirty(bh);
703 bh = bh->b_this_page;
704 } while (bh != head);
705 }
706 newly_dirty = !TestSetPageDirty(page);
707 spin_unlock(&mapping->private_lock);
708
709 if (newly_dirty)
710 __set_page_dirty(page, mapping, 1);
711 return newly_dirty;
712}
713EXPORT_SYMBOL(__set_page_dirty_buffers);
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734static int fsync_buffers_list(spinlock_t *lock, struct list_head *list)
735{
736 struct buffer_head *bh;
737 struct list_head tmp;
738 struct address_space *mapping;
739 int err = 0, err2;
740 struct blk_plug plug;
741
742 INIT_LIST_HEAD(&tmp);
743 blk_start_plug(&plug);
744
745 spin_lock(lock);
746 while (!list_empty(list)) {
747 bh = BH_ENTRY(list->next);
748 mapping = bh->b_assoc_map;
749 __remove_assoc_queue(bh);
750
751
752 smp_mb();
753 if (buffer_dirty(bh) || buffer_locked(bh)) {
754 list_add(&bh->b_assoc_buffers, &tmp);
755 bh->b_assoc_map = mapping;
756 if (buffer_dirty(bh)) {
757 get_bh(bh);
758 spin_unlock(lock);
759
760
761
762
763
764
765
766 write_dirty_buffer(bh, WRITE_SYNC);
767
768
769
770
771
772
773
774 brelse(bh);
775 spin_lock(lock);
776 }
777 }
778 }
779
780 spin_unlock(lock);
781 blk_finish_plug(&plug);
782 spin_lock(lock);
783
784 while (!list_empty(&tmp)) {
785 bh = BH_ENTRY(tmp.prev);
786 get_bh(bh);
787 mapping = bh->b_assoc_map;
788 __remove_assoc_queue(bh);
789
790
791 smp_mb();
792 if (buffer_dirty(bh)) {
793 list_add(&bh->b_assoc_buffers,
794 &mapping->private_list);
795 bh->b_assoc_map = mapping;
796 }
797 spin_unlock(lock);
798 wait_on_buffer(bh);
799 if (!buffer_uptodate(bh))
800 err = -EIO;
801 brelse(bh);
802 spin_lock(lock);
803 }
804
805 spin_unlock(lock);
806 err2 = osync_buffers_list(lock, list);
807 if (err)
808 return err;
809 else
810 return err2;
811}
812
813
814
815
816
817
818
819
820
821
822void invalidate_inode_buffers(struct inode *inode)
823{
824 if (inode_has_buffers(inode)) {
825 struct address_space *mapping = &inode->i_data;
826 struct list_head *list = &mapping->private_list;
827 struct address_space *buffer_mapping = mapping->private_data;
828
829 spin_lock(&buffer_mapping->private_lock);
830 while (!list_empty(list))
831 __remove_assoc_queue(BH_ENTRY(list->next));
832 spin_unlock(&buffer_mapping->private_lock);
833 }
834}
835EXPORT_SYMBOL(invalidate_inode_buffers);
836
837
838
839
840
841
842
843int remove_inode_buffers(struct inode *inode)
844{
845 int ret = 1;
846
847 if (inode_has_buffers(inode)) {
848 struct address_space *mapping = &inode->i_data;
849 struct list_head *list = &mapping->private_list;
850 struct address_space *buffer_mapping = mapping->private_data;
851
852 spin_lock(&buffer_mapping->private_lock);
853 while (!list_empty(list)) {
854 struct buffer_head *bh = BH_ENTRY(list->next);
855 if (buffer_dirty(bh)) {
856 ret = 0;
857 break;
858 }
859 __remove_assoc_queue(bh);
860 }
861 spin_unlock(&buffer_mapping->private_lock);
862 }
863 return ret;
864}
865
866
867
868
869
870
871
872
873
874
875struct buffer_head *alloc_page_buffers(struct page *page, unsigned long size,
876 int retry)
877{
878 struct buffer_head *bh, *head;
879 long offset;
880
881try_again:
882 head = NULL;
883 offset = PAGE_SIZE;
884 while ((offset -= size) >= 0) {
885 bh = alloc_buffer_head(GFP_NOFS);
886 if (!bh)
887 goto no_grow;
888
889 bh->b_this_page = head;
890 bh->b_blocknr = -1;
891 head = bh;
892
893 bh->b_size = size;
894
895
896 set_bh_page(bh, page, offset);
897 }
898 return head;
899
900
901
902no_grow:
903 if (head) {
904 do {
905 bh = head;
906 head = head->b_this_page;
907 free_buffer_head(bh);
908 } while (head);
909 }
910
911
912
913
914
915
916
917 if (!retry)
918 return NULL;
919
920
921
922
923
924
925
926 free_more_memory();
927 goto try_again;
928}
929EXPORT_SYMBOL_GPL(alloc_page_buffers);
930
931static inline void
932link_dev_buffers(struct page *page, struct buffer_head *head)
933{
934 struct buffer_head *bh, *tail;
935
936 bh = head;
937 do {
938 tail = bh;
939 bh = bh->b_this_page;
940 } while (bh);
941 tail->b_this_page = head;
942 attach_page_buffers(page, head);
943}
944
945static sector_t blkdev_max_block(struct block_device *bdev, unsigned int size)
946{
947 sector_t retval = ~((sector_t)0);
948 loff_t sz = i_size_read(bdev->bd_inode);
949
950 if (sz) {
951 unsigned int sizebits = blksize_bits(size);
952 retval = (sz >> sizebits);
953 }
954 return retval;
955}
956
957
958
959
960static sector_t
961init_page_buffers(struct page *page, struct block_device *bdev,
962 sector_t block, int size)
963{
964 struct buffer_head *head = page_buffers(page);
965 struct buffer_head *bh = head;
966 int uptodate = PageUptodate(page);
967 sector_t end_block = blkdev_max_block(I_BDEV(bdev->bd_inode), size);
968
969 do {
970 if (!buffer_mapped(bh)) {
971 init_buffer(bh, NULL, NULL);
972 bh->b_bdev = bdev;
973 bh->b_blocknr = block;
974 if (uptodate)
975 set_buffer_uptodate(bh);
976 if (block < end_block)
977 set_buffer_mapped(bh);
978 }
979 block++;
980 bh = bh->b_this_page;
981 } while (bh != head);
982
983
984
985
986 return end_block;
987}
988
989
990
991
992
993
994static int
995grow_dev_page(struct block_device *bdev, sector_t block,
996 pgoff_t index, int size, int sizebits)
997{
998 struct inode *inode = bdev->bd_inode;
999 struct page *page;
1000 struct buffer_head *bh;
1001 sector_t end_block;
1002 int ret = 0;
1003 gfp_t gfp_mask;
1004
1005 gfp_mask = mapping_gfp_mask(inode->i_mapping) & ~__GFP_FS;
1006 gfp_mask |= __GFP_MOVABLE;
1007
1008
1009
1010
1011
1012
1013 gfp_mask |= __GFP_NOFAIL;
1014
1015 page = find_or_create_page(inode->i_mapping, index, gfp_mask);
1016 if (!page)
1017 return ret;
1018
1019 BUG_ON(!PageLocked(page));
1020
1021 if (page_has_buffers(page)) {
1022 bh = page_buffers(page);
1023 if (bh->b_size == size) {
1024 end_block = init_page_buffers(page, bdev,
1025 (sector_t)index << sizebits,
1026 size);
1027 goto done;
1028 }
1029 if (!try_to_free_buffers(page))
1030 goto failed;
1031 }
1032
1033
1034
1035
1036 bh = alloc_page_buffers(page, size, 0);
1037 if (!bh)
1038 goto failed;
1039
1040
1041
1042
1043
1044
1045 spin_lock(&inode->i_mapping->private_lock);
1046 link_dev_buffers(page, bh);
1047 end_block = init_page_buffers(page, bdev, (sector_t)index << sizebits,
1048 size);
1049 spin_unlock(&inode->i_mapping->private_lock);
1050done:
1051 ret = (block < end_block) ? 1 : -ENXIO;
1052failed:
1053 unlock_page(page);
1054 page_cache_release(page);
1055 return ret;
1056}
1057
1058
1059
1060
1061
1062static int
1063grow_buffers(struct block_device *bdev, sector_t block, int size)
1064{
1065 pgoff_t index;
1066 int sizebits;
1067
1068 sizebits = -1;
1069 do {
1070 sizebits++;
1071 } while ((size << sizebits) < PAGE_SIZE);
1072
1073 index = block >> sizebits;
1074
1075
1076
1077
1078
1079 if (unlikely(index != block >> sizebits)) {
1080 char b[BDEVNAME_SIZE];
1081
1082 printk(KERN_ERR "%s: requested out-of-range block %llu for "
1083 "device %s\n",
1084 __func__, (unsigned long long)block,
1085 bdevname(bdev, b));
1086 return -EIO;
1087 }
1088
1089
1090 return grow_dev_page(bdev, block, index, size, sizebits);
1091}
1092
1093static struct buffer_head *
1094__getblk_slow(struct block_device *bdev, sector_t block, int size)
1095{
1096
1097 if (unlikely(size & (bdev_logical_block_size(bdev)-1) ||
1098 (size < 512 || size > PAGE_SIZE))) {
1099 printk(KERN_ERR "getblk(): invalid block size %d requested\n",
1100 size);
1101 printk(KERN_ERR "logical block size: %d\n",
1102 bdev_logical_block_size(bdev));
1103
1104 dump_stack();
1105 return NULL;
1106 }
1107
1108 for (;;) {
1109 struct buffer_head *bh;
1110 int ret;
1111
1112 bh = __find_get_block(bdev, block, size);
1113 if (bh)
1114 return bh;
1115
1116 ret = grow_buffers(bdev, block, size);
1117 if (ret < 0)
1118 return NULL;
1119 if (ret == 0)
1120 free_more_memory();
1121 }
1122}
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159void mark_buffer_dirty(struct buffer_head *bh)
1160{
1161 WARN_ON_ONCE(!buffer_uptodate(bh));
1162
1163 trace_block_dirty_buffer(bh);
1164
1165
1166
1167
1168
1169
1170
1171 if (buffer_dirty(bh)) {
1172 smp_mb();
1173 if (buffer_dirty(bh))
1174 return;
1175 }
1176
1177 if (!test_set_buffer_dirty(bh)) {
1178 struct page *page = bh->b_page;
1179 if (!TestSetPageDirty(page)) {
1180 struct address_space *mapping = page_mapping(page);
1181 if (mapping)
1182 __set_page_dirty(page, mapping, 0);
1183 }
1184 }
1185}
1186EXPORT_SYMBOL(mark_buffer_dirty);
1187
1188
1189
1190
1191
1192
1193
1194
1195void __brelse(struct buffer_head * buf)
1196{
1197 if (atomic_read(&buf->b_count)) {
1198 put_bh(buf);
1199 return;
1200 }
1201 WARN(1, KERN_ERR "VFS: brelse: Trying to free free buffer\n");
1202}
1203EXPORT_SYMBOL(__brelse);
1204
1205
1206
1207
1208
1209void __bforget(struct buffer_head *bh)
1210{
1211 clear_buffer_dirty(bh);
1212 if (bh->b_assoc_map) {
1213 struct address_space *buffer_mapping = bh->b_page->mapping;
1214
1215 spin_lock(&buffer_mapping->private_lock);
1216 list_del_init(&bh->b_assoc_buffers);
1217 bh->b_assoc_map = NULL;
1218 spin_unlock(&buffer_mapping->private_lock);
1219 }
1220 __brelse(bh);
1221}
1222EXPORT_SYMBOL(__bforget);
1223
1224static struct buffer_head *__bread_slow(struct buffer_head *bh)
1225{
1226 lock_buffer(bh);
1227 if (buffer_uptodate(bh)) {
1228 unlock_buffer(bh);
1229 return bh;
1230 } else {
1231 get_bh(bh);
1232 bh->b_end_io = end_buffer_read_sync;
1233 submit_bh(READ, bh);
1234 wait_on_buffer(bh);
1235 if (buffer_uptodate(bh))
1236 return bh;
1237 }
1238 brelse(bh);
1239 return NULL;
1240}
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256#define BH_LRU_SIZE 8
1257
1258struct bh_lru {
1259 struct buffer_head *bhs[BH_LRU_SIZE];
1260};
1261
1262static DEFINE_PER_CPU(struct bh_lru, bh_lrus) = {{ NULL }};
1263
1264#ifdef CONFIG_SMP
1265#define bh_lru_lock() local_irq_disable()
1266#define bh_lru_unlock() local_irq_enable()
1267#else
1268#define bh_lru_lock() preempt_disable()
1269#define bh_lru_unlock() preempt_enable()
1270#endif
1271
1272static inline void check_irqs_on(void)
1273{
1274#ifdef irqs_disabled
1275 BUG_ON(irqs_disabled());
1276#endif
1277}
1278
1279
1280
1281
1282static void bh_lru_install(struct buffer_head *bh)
1283{
1284 struct buffer_head *evictee = NULL;
1285
1286 check_irqs_on();
1287 bh_lru_lock();
1288 if (__this_cpu_read(bh_lrus.bhs[0]) != bh) {
1289 struct buffer_head *bhs[BH_LRU_SIZE];
1290 int in;
1291 int out = 0;
1292
1293 get_bh(bh);
1294 bhs[out++] = bh;
1295 for (in = 0; in < BH_LRU_SIZE; in++) {
1296 struct buffer_head *bh2 =
1297 __this_cpu_read(bh_lrus.bhs[in]);
1298
1299 if (bh2 == bh) {
1300 __brelse(bh2);
1301 } else {
1302 if (out >= BH_LRU_SIZE) {
1303 BUG_ON(evictee != NULL);
1304 evictee = bh2;
1305 } else {
1306 bhs[out++] = bh2;
1307 }
1308 }
1309 }
1310 while (out < BH_LRU_SIZE)
1311 bhs[out++] = NULL;
1312 memcpy(this_cpu_ptr(&bh_lrus.bhs), bhs, sizeof(bhs));
1313 }
1314 bh_lru_unlock();
1315
1316 if (evictee)
1317 __brelse(evictee);
1318}
1319
1320
1321
1322
1323static struct buffer_head *
1324lookup_bh_lru(struct block_device *bdev, sector_t block, unsigned size)
1325{
1326 struct buffer_head *ret = NULL;
1327 unsigned int i;
1328
1329 check_irqs_on();
1330 bh_lru_lock();
1331 for (i = 0; i < BH_LRU_SIZE; i++) {
1332 struct buffer_head *bh = __this_cpu_read(bh_lrus.bhs[i]);
1333
1334 if (bh && bh->b_bdev == bdev &&
1335 bh->b_blocknr == block && bh->b_size == size) {
1336 if (i) {
1337 while (i) {
1338 __this_cpu_write(bh_lrus.bhs[i],
1339 __this_cpu_read(bh_lrus.bhs[i - 1]));
1340 i--;
1341 }
1342 __this_cpu_write(bh_lrus.bhs[0], bh);
1343 }
1344 get_bh(bh);
1345 ret = bh;
1346 break;
1347 }
1348 }
1349 bh_lru_unlock();
1350 return ret;
1351}
1352
1353
1354
1355
1356
1357
1358struct buffer_head *
1359__find_get_block(struct block_device *bdev, sector_t block, unsigned size)
1360{
1361 struct buffer_head *bh = lookup_bh_lru(bdev, block, size);
1362
1363 if (bh == NULL) {
1364
1365 bh = __find_get_block_slow(bdev, block);
1366 if (bh)
1367 bh_lru_install(bh);
1368 } else
1369 touch_buffer(bh);
1370
1371 return bh;
1372}
1373EXPORT_SYMBOL(__find_get_block);
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383struct buffer_head *
1384__getblk(struct block_device *bdev, sector_t block, unsigned size)
1385{
1386 struct buffer_head *bh = __find_get_block(bdev, block, size);
1387
1388 might_sleep();
1389 if (bh == NULL)
1390 bh = __getblk_slow(bdev, block, size);
1391 return bh;
1392}
1393EXPORT_SYMBOL(__getblk);
1394
1395
1396
1397
1398void __breadahead(struct block_device *bdev, sector_t block, unsigned size)
1399{
1400 struct buffer_head *bh = __getblk(bdev, block, size);
1401 if (likely(bh)) {
1402 ll_rw_block(READA, 1, &bh);
1403 brelse(bh);
1404 }
1405}
1406EXPORT_SYMBOL(__breadahead);
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417struct buffer_head *
1418__bread(struct block_device *bdev, sector_t block, unsigned size)
1419{
1420 struct buffer_head *bh = __getblk(bdev, block, size);
1421
1422 if (likely(bh) && !buffer_uptodate(bh))
1423 bh = __bread_slow(bh);
1424 return bh;
1425}
1426EXPORT_SYMBOL(__bread);
1427
1428
1429
1430
1431
1432
1433static void invalidate_bh_lru(void *arg)
1434{
1435 struct bh_lru *b = &get_cpu_var(bh_lrus);
1436 int i;
1437
1438 for (i = 0; i < BH_LRU_SIZE; i++) {
1439 brelse(b->bhs[i]);
1440 b->bhs[i] = NULL;
1441 }
1442 put_cpu_var(bh_lrus);
1443}
1444
1445static bool has_bh_in_lru(int cpu, void *dummy)
1446{
1447 struct bh_lru *b = per_cpu_ptr(&bh_lrus, cpu);
1448 int i;
1449
1450 for (i = 0; i < BH_LRU_SIZE; i++) {
1451 if (b->bhs[i])
1452 return 1;
1453 }
1454
1455 return 0;
1456}
1457
1458void invalidate_bh_lrus(void)
1459{
1460 on_each_cpu_cond(has_bh_in_lru, invalidate_bh_lru, NULL, 1, GFP_KERNEL);
1461}
1462EXPORT_SYMBOL_GPL(invalidate_bh_lrus);
1463
1464void set_bh_page(struct buffer_head *bh,
1465 struct page *page, unsigned long offset)
1466{
1467 bh->b_page = page;
1468 BUG_ON(offset >= PAGE_SIZE);
1469 if (PageHighMem(page))
1470
1471
1472
1473 bh->b_data = (char *)(0 + offset);
1474 else
1475 bh->b_data = page_address(page) + offset;
1476}
1477EXPORT_SYMBOL(set_bh_page);
1478
1479
1480
1481
1482
1483
1484#define BUFFER_FLAGS_DISCARD \
1485 (1 << BH_Mapped | 1 << BH_New | 1 << BH_Req | \
1486 1 << BH_Delay | 1 << BH_Unwritten)
1487
1488static void discard_buffer(struct buffer_head * bh)
1489{
1490 unsigned long b_state, b_state_old;
1491
1492 lock_buffer(bh);
1493 clear_buffer_dirty(bh);
1494 bh->b_bdev = NULL;
1495 b_state = bh->b_state;
1496 for (;;) {
1497 b_state_old = cmpxchg(&bh->b_state, b_state,
1498 (b_state & ~BUFFER_FLAGS_DISCARD));
1499 if (b_state_old == b_state)
1500 break;
1501 b_state = b_state_old;
1502 }
1503 unlock_buffer(bh);
1504}
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522void block_invalidatepage(struct page *page, unsigned int offset,
1523 unsigned int length)
1524{
1525 struct buffer_head *head, *bh, *next;
1526 unsigned int curr_off = 0;
1527 unsigned int stop = length + offset;
1528
1529 BUG_ON(!PageLocked(page));
1530 if (!page_has_buffers(page))
1531 goto out;
1532
1533
1534
1535
1536 BUG_ON(stop > PAGE_CACHE_SIZE || stop < length);
1537
1538 head = page_buffers(page);
1539 bh = head;
1540 do {
1541 unsigned int next_off = curr_off + bh->b_size;
1542 next = bh->b_this_page;
1543
1544
1545
1546
1547 if (next_off > stop)
1548 goto out;
1549
1550
1551
1552
1553 if (offset <= curr_off)
1554 discard_buffer(bh);
1555 curr_off = next_off;
1556 bh = next;
1557 } while (bh != head);
1558
1559
1560
1561
1562
1563
1564 if (offset == 0)
1565 try_to_release_page(page, 0);
1566out:
1567 return;
1568}
1569EXPORT_SYMBOL(block_invalidatepage);
1570
1571
1572
1573
1574
1575
1576
1577void create_empty_buffers(struct page *page,
1578 unsigned long blocksize, unsigned long b_state)
1579{
1580 struct buffer_head *bh, *head, *tail;
1581
1582 head = alloc_page_buffers(page, blocksize, 1);
1583 bh = head;
1584 do {
1585 bh->b_state |= b_state;
1586 tail = bh;
1587 bh = bh->b_this_page;
1588 } while (bh);
1589 tail->b_this_page = head;
1590
1591 spin_lock(&page->mapping->private_lock);
1592 if (PageUptodate(page) || PageDirty(page)) {
1593 bh = head;
1594 do {
1595 if (PageDirty(page))
1596 set_buffer_dirty(bh);
1597 if (PageUptodate(page))
1598 set_buffer_uptodate(bh);
1599 bh = bh->b_this_page;
1600 } while (bh != head);
1601 }
1602 attach_page_buffers(page, head);
1603 spin_unlock(&page->mapping->private_lock);
1604}
1605EXPORT_SYMBOL(create_empty_buffers);
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623void unmap_underlying_metadata(struct block_device *bdev, sector_t block)
1624{
1625 struct buffer_head *old_bh;
1626
1627 might_sleep();
1628
1629 old_bh = __find_get_block_slow(bdev, block);
1630 if (old_bh) {
1631 clear_buffer_dirty(old_bh);
1632 wait_on_buffer(old_bh);
1633 clear_buffer_req(old_bh);
1634 __brelse(old_bh);
1635 }
1636}
1637EXPORT_SYMBOL(unmap_underlying_metadata);
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647static inline int block_size_bits(unsigned int blocksize)
1648{
1649 return ilog2(blocksize);
1650}
1651
1652static struct buffer_head *create_page_buffers(struct page *page, struct inode *inode, unsigned int b_state)
1653{
1654 BUG_ON(!PageLocked(page));
1655
1656 if (!page_has_buffers(page))
1657 create_empty_buffers(page, 1 << ACCESS_ONCE(inode->i_blkbits), b_state);
1658 return page_buffers(page);
1659}
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690static int __block_write_full_page(struct inode *inode, struct page *page,
1691 get_block_t *get_block, struct writeback_control *wbc,
1692 bh_end_io_t *handler)
1693{
1694 int err;
1695 sector_t block;
1696 sector_t last_block;
1697 struct buffer_head *bh, *head;
1698 unsigned int blocksize, bbits;
1699 int nr_underway = 0;
1700 int write_op = (wbc->sync_mode == WB_SYNC_ALL ?
1701 WRITE_SYNC : WRITE);
1702
1703 head = create_page_buffers(page, inode,
1704 (1 << BH_Dirty)|(1 << BH_Uptodate));
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716 bh = head;
1717 blocksize = bh->b_size;
1718 bbits = block_size_bits(blocksize);
1719
1720 block = (sector_t)page->index << (PAGE_CACHE_SHIFT - bbits);
1721 last_block = (i_size_read(inode) - 1) >> bbits;
1722
1723
1724
1725
1726
1727 do {
1728 if (block > last_block) {
1729
1730
1731
1732
1733
1734
1735
1736
1737 clear_buffer_dirty(bh);
1738 set_buffer_uptodate(bh);
1739 } else if ((!buffer_mapped(bh) || buffer_delay(bh)) &&
1740 buffer_dirty(bh)) {
1741 WARN_ON(bh->b_size != blocksize);
1742 err = get_block(inode, block, bh, 1);
1743 if (err)
1744 goto recover;
1745 clear_buffer_delay(bh);
1746 if (buffer_new(bh)) {
1747
1748 clear_buffer_new(bh);
1749 unmap_underlying_metadata(bh->b_bdev,
1750 bh->b_blocknr);
1751 }
1752 }
1753 bh = bh->b_this_page;
1754 block++;
1755 } while (bh != head);
1756
1757 do {
1758 if (!buffer_mapped(bh))
1759 continue;
1760
1761
1762
1763
1764
1765
1766
1767 if (wbc->sync_mode != WB_SYNC_NONE) {
1768 lock_buffer(bh);
1769 } else if (!trylock_buffer(bh)) {
1770 redirty_page_for_writepage(wbc, page);
1771 continue;
1772 }
1773 if (test_clear_buffer_dirty(bh)) {
1774 mark_buffer_async_write_endio(bh, handler);
1775 } else {
1776 unlock_buffer(bh);
1777 }
1778 } while ((bh = bh->b_this_page) != head);
1779
1780
1781
1782
1783
1784 BUG_ON(PageWriteback(page));
1785 set_page_writeback(page);
1786
1787 do {
1788 struct buffer_head *next = bh->b_this_page;
1789 if (buffer_async_write(bh)) {
1790 submit_bh(write_op, bh);
1791 nr_underway++;
1792 }
1793 bh = next;
1794 } while (bh != head);
1795 unlock_page(page);
1796
1797 err = 0;
1798done:
1799 if (nr_underway == 0) {
1800
1801
1802
1803
1804
1805 end_page_writeback(page);
1806
1807
1808
1809
1810
1811 }
1812 return err;
1813
1814recover:
1815
1816
1817
1818
1819
1820
1821 bh = head;
1822
1823 do {
1824 if (buffer_mapped(bh) && buffer_dirty(bh) &&
1825 !buffer_delay(bh)) {
1826 lock_buffer(bh);
1827 mark_buffer_async_write_endio(bh, handler);
1828 } else {
1829
1830
1831
1832
1833 clear_buffer_dirty(bh);
1834 }
1835 } while ((bh = bh->b_this_page) != head);
1836 SetPageError(page);
1837 BUG_ON(PageWriteback(page));
1838 mapping_set_error(page->mapping, err);
1839 set_page_writeback(page);
1840 do {
1841 struct buffer_head *next = bh->b_this_page;
1842 if (buffer_async_write(bh)) {
1843 clear_buffer_dirty(bh);
1844 submit_bh(write_op, bh);
1845 nr_underway++;
1846 }
1847 bh = next;
1848 } while (bh != head);
1849 unlock_page(page);
1850 goto done;
1851}
1852
1853
1854
1855
1856
1857
1858void page_zero_new_buffers(struct page *page, unsigned from, unsigned to)
1859{
1860 unsigned int block_start, block_end;
1861 struct buffer_head *head, *bh;
1862
1863 BUG_ON(!PageLocked(page));
1864 if (!page_has_buffers(page))
1865 return;
1866
1867 bh = head = page_buffers(page);
1868 block_start = 0;
1869 do {
1870 block_end = block_start + bh->b_size;
1871
1872 if (buffer_new(bh)) {
1873 if (block_end > from && block_start < to) {
1874 if (!PageUptodate(page)) {
1875 unsigned start, size;
1876
1877 start = max(from, block_start);
1878 size = min(to, block_end) - start;
1879
1880 zero_user(page, start, size);
1881 set_buffer_uptodate(bh);
1882 }
1883
1884 clear_buffer_new(bh);
1885 mark_buffer_dirty(bh);
1886 }
1887 }
1888
1889 block_start = block_end;
1890 bh = bh->b_this_page;
1891 } while (bh != head);
1892}
1893EXPORT_SYMBOL(page_zero_new_buffers);
1894
1895int __block_write_begin(struct page *page, loff_t pos, unsigned len,
1896 get_block_t *get_block)
1897{
1898 unsigned from = pos & (PAGE_CACHE_SIZE - 1);
1899 unsigned to = from + len;
1900 struct inode *inode = page->mapping->host;
1901 unsigned block_start, block_end;
1902 sector_t block;
1903 int err = 0;
1904 unsigned blocksize, bbits;
1905 struct buffer_head *bh, *head, *wait[2], **wait_bh=wait;
1906
1907 BUG_ON(!PageLocked(page));
1908 BUG_ON(from > PAGE_CACHE_SIZE);
1909 BUG_ON(to > PAGE_CACHE_SIZE);
1910 BUG_ON(from > to);
1911
1912 head = create_page_buffers(page, inode, 0);
1913 blocksize = head->b_size;
1914 bbits = block_size_bits(blocksize);
1915
1916 block = (sector_t)page->index << (PAGE_CACHE_SHIFT - bbits);
1917
1918 for(bh = head, block_start = 0; bh != head || !block_start;
1919 block++, block_start=block_end, bh = bh->b_this_page) {
1920 block_end = block_start + blocksize;
1921 if (block_end <= from || block_start >= to) {
1922 if (PageUptodate(page)) {
1923 if (!buffer_uptodate(bh))
1924 set_buffer_uptodate(bh);
1925 }
1926 continue;
1927 }
1928 if (buffer_new(bh))
1929 clear_buffer_new(bh);
1930 if (!buffer_mapped(bh)) {
1931 WARN_ON(bh->b_size != blocksize);
1932 err = get_block(inode, block, bh, 1);
1933 if (err)
1934 break;
1935 if (buffer_new(bh)) {
1936 unmap_underlying_metadata(bh->b_bdev,
1937 bh->b_blocknr);
1938 if (PageUptodate(page)) {
1939 clear_buffer_new(bh);
1940 set_buffer_uptodate(bh);
1941 mark_buffer_dirty(bh);
1942 continue;
1943 }
1944 if (block_end > to || block_start < from)
1945 zero_user_segments(page,
1946 to, block_end,
1947 block_start, from);
1948 continue;
1949 }
1950 }
1951 if (PageUptodate(page)) {
1952 if (!buffer_uptodate(bh))
1953 set_buffer_uptodate(bh);
1954 continue;
1955 }
1956 if (!buffer_uptodate(bh) && !buffer_delay(bh) &&
1957 !buffer_unwritten(bh) &&
1958 (block_start < from || block_end > to)) {
1959 ll_rw_block(READ, 1, &bh);
1960 *wait_bh++=bh;
1961 }
1962 }
1963
1964
1965
1966 while(wait_bh > wait) {
1967 wait_on_buffer(*--wait_bh);
1968 if (!buffer_uptodate(*wait_bh))
1969 err = -EIO;
1970 }
1971 if (unlikely(err))
1972 page_zero_new_buffers(page, from, to);
1973 return err;
1974}
1975EXPORT_SYMBOL(__block_write_begin);
1976
1977static int __block_commit_write(struct inode *inode, struct page *page,
1978 unsigned from, unsigned to)
1979{
1980 unsigned block_start, block_end;
1981 int partial = 0;
1982 unsigned blocksize;
1983 struct buffer_head *bh, *head;
1984
1985 bh = head = page_buffers(page);
1986 blocksize = bh->b_size;
1987
1988 block_start = 0;
1989 do {
1990 block_end = block_start + blocksize;
1991 if (block_end <= from || block_start >= to) {
1992 if (!buffer_uptodate(bh))
1993 partial = 1;
1994 } else {
1995 set_buffer_uptodate(bh);
1996 mark_buffer_dirty(bh);
1997 }
1998 clear_buffer_new(bh);
1999
2000 block_start = block_end;
2001 bh = bh->b_this_page;
2002 } while (bh != head);
2003
2004
2005
2006
2007
2008
2009
2010 if (!partial)
2011 SetPageUptodate(page);
2012 return 0;
2013}
2014
2015
2016
2017
2018
2019
2020
2021int block_write_begin(struct address_space *mapping, loff_t pos, unsigned len,
2022 unsigned flags, struct page **pagep, get_block_t *get_block)
2023{
2024 pgoff_t index = pos >> PAGE_CACHE_SHIFT;
2025 struct page *page;
2026 int status;
2027
2028 page = grab_cache_page_write_begin(mapping, index, flags);
2029 if (!page)
2030 return -ENOMEM;
2031
2032 status = __block_write_begin(page, pos, len, get_block);
2033 if (unlikely(status)) {
2034 unlock_page(page);
2035 page_cache_release(page);
2036 page = NULL;
2037 }
2038
2039 *pagep = page;
2040 return status;
2041}
2042EXPORT_SYMBOL(block_write_begin);
2043
2044int block_write_end(struct file *file, struct address_space *mapping,
2045 loff_t pos, unsigned len, unsigned copied,
2046 struct page *page, void *fsdata)
2047{
2048 struct inode *inode = mapping->host;
2049 unsigned start;
2050
2051 start = pos & (PAGE_CACHE_SIZE - 1);
2052
2053 if (unlikely(copied < len)) {
2054
2055
2056
2057
2058
2059
2060
2061
2062
2063
2064
2065
2066 if (!PageUptodate(page))
2067 copied = 0;
2068
2069 page_zero_new_buffers(page, start+copied, start+len);
2070 }
2071 flush_dcache_page(page);
2072
2073
2074 __block_commit_write(inode, page, start, start+copied);
2075
2076 return copied;
2077}
2078EXPORT_SYMBOL(block_write_end);
2079
2080int generic_write_end(struct file *file, struct address_space *mapping,
2081 loff_t pos, unsigned len, unsigned copied,
2082 struct page *page, void *fsdata)
2083{
2084 struct inode *inode = mapping->host;
2085 int i_size_changed = 0;
2086
2087 copied = block_write_end(file, mapping, pos, len, copied, page, fsdata);
2088
2089
2090
2091
2092
2093
2094
2095
2096 if (pos+copied > inode->i_size) {
2097 i_size_write(inode, pos+copied);
2098 i_size_changed = 1;
2099 }
2100
2101 unlock_page(page);
2102 page_cache_release(page);
2103
2104
2105
2106
2107
2108
2109
2110 if (i_size_changed)
2111 mark_inode_dirty(inode);
2112
2113 return copied;
2114}
2115EXPORT_SYMBOL(generic_write_end);
2116
2117
2118
2119
2120
2121
2122
2123
2124int block_is_partially_uptodate(struct page *page, unsigned long from,
2125 unsigned long count)
2126{
2127 unsigned block_start, block_end, blocksize;
2128 unsigned to;
2129 struct buffer_head *bh, *head;
2130 int ret = 1;
2131
2132 if (!page_has_buffers(page))
2133 return 0;
2134
2135 head = page_buffers(page);
2136 blocksize = head->b_size;
2137 to = min_t(unsigned, PAGE_CACHE_SIZE - from, count);
2138 to = from + to;
2139 if (from < blocksize && to > PAGE_CACHE_SIZE - blocksize)
2140 return 0;
2141
2142 bh = head;
2143 block_start = 0;
2144 do {
2145 block_end = block_start + blocksize;
2146 if (block_end > from && block_start < to) {
2147 if (!buffer_uptodate(bh)) {
2148 ret = 0;
2149 break;
2150 }
2151 if (block_end >= to)
2152 break;
2153 }
2154 block_start = block_end;
2155 bh = bh->b_this_page;
2156 } while (bh != head);
2157
2158 return ret;
2159}
2160EXPORT_SYMBOL(block_is_partially_uptodate);
2161
2162
2163
2164
2165
2166
2167
2168
2169int block_read_full_page(struct page *page, get_block_t *get_block)
2170{
2171 struct inode *inode = page->mapping->host;
2172 sector_t iblock, lblock;
2173 struct buffer_head *bh, *head, *arr[MAX_BUF_PER_PAGE];
2174 unsigned int blocksize, bbits;
2175 int nr, i;
2176 int fully_mapped = 1;
2177
2178 head = create_page_buffers(page, inode, 0);
2179 blocksize = head->b_size;
2180 bbits = block_size_bits(blocksize);
2181
2182 iblock = (sector_t)page->index << (PAGE_CACHE_SHIFT - bbits);
2183 lblock = (i_size_read(inode)+blocksize-1) >> bbits;
2184 bh = head;
2185 nr = 0;
2186 i = 0;
2187
2188 do {
2189 if (buffer_uptodate(bh))
2190 continue;
2191
2192 if (!buffer_mapped(bh)) {
2193 int err = 0;
2194
2195 fully_mapped = 0;
2196 if (iblock < lblock) {
2197 WARN_ON(bh->b_size != blocksize);
2198 err = get_block(inode, iblock, bh, 0);
2199 if (err)
2200 SetPageError(page);
2201 }
2202 if (!buffer_mapped(bh)) {
2203 zero_user(page, i * blocksize, blocksize);
2204 if (!err)
2205 set_buffer_uptodate(bh);
2206 continue;
2207 }
2208
2209
2210
2211
2212 if (buffer_uptodate(bh))
2213 continue;
2214 }
2215 arr[nr++] = bh;
2216 } while (i++, iblock++, (bh = bh->b_this_page) != head);
2217
2218 if (fully_mapped)
2219 SetPageMappedToDisk(page);
2220
2221 if (!nr) {
2222
2223
2224
2225
2226 if (!PageError(page))
2227 SetPageUptodate(page);
2228 unlock_page(page);
2229 return 0;
2230 }
2231
2232
2233 for (i = 0; i < nr; i++) {
2234 bh = arr[i];
2235 lock_buffer(bh);
2236 mark_buffer_async_read(bh);
2237 }
2238
2239
2240
2241
2242
2243
2244 for (i = 0; i < nr; i++) {
2245 bh = arr[i];
2246 if (buffer_uptodate(bh))
2247 end_buffer_async_read(bh, 1);
2248 else
2249 submit_bh(READ, bh);
2250 }
2251 return 0;
2252}
2253EXPORT_SYMBOL(block_read_full_page);
2254
2255
2256
2257
2258
2259int generic_cont_expand_simple(struct inode *inode, loff_t size)
2260{
2261 struct address_space *mapping = inode->i_mapping;
2262 struct page *page;
2263 void *fsdata;
2264 int err;
2265
2266 err = inode_newsize_ok(inode, size);
2267 if (err)
2268 goto out;
2269
2270 err = pagecache_write_begin(NULL, mapping, size, 0,
2271 AOP_FLAG_UNINTERRUPTIBLE|AOP_FLAG_CONT_EXPAND,
2272 &page, &fsdata);
2273 if (err)
2274 goto out;
2275
2276 err = pagecache_write_end(NULL, mapping, size, 0, 0, page, fsdata);
2277 BUG_ON(err > 0);
2278
2279out:
2280 return err;
2281}
2282EXPORT_SYMBOL(generic_cont_expand_simple);
2283
2284static int cont_expand_zero(struct file *file, struct address_space *mapping,
2285 loff_t pos, loff_t *bytes)
2286{
2287 struct inode *inode = mapping->host;
2288 unsigned blocksize = 1 << inode->i_blkbits;
2289 struct page *page;
2290 void *fsdata;
2291 pgoff_t index, curidx;
2292 loff_t curpos;
2293 unsigned zerofrom, offset, len;
2294 int err = 0;
2295
2296 index = pos >> PAGE_CACHE_SHIFT;
2297 offset = pos & ~PAGE_CACHE_MASK;
2298
2299 while (index > (curidx = (curpos = *bytes)>>PAGE_CACHE_SHIFT)) {
2300 zerofrom = curpos & ~PAGE_CACHE_MASK;
2301 if (zerofrom & (blocksize-1)) {
2302 *bytes |= (blocksize-1);
2303 (*bytes)++;
2304 }
2305 len = PAGE_CACHE_SIZE - zerofrom;
2306
2307 err = pagecache_write_begin(file, mapping, curpos, len,
2308 AOP_FLAG_UNINTERRUPTIBLE,
2309 &page, &fsdata);
2310 if (err)
2311 goto out;
2312 zero_user(page, zerofrom, len);
2313 err = pagecache_write_end(file, mapping, curpos, len, len,
2314 page, fsdata);
2315 if (err < 0)
2316 goto out;
2317 BUG_ON(err != len);
2318 err = 0;
2319
2320 balance_dirty_pages_ratelimited(mapping);
2321 }
2322
2323
2324 if (index == curidx) {
2325 zerofrom = curpos & ~PAGE_CACHE_MASK;
2326
2327 if (offset <= zerofrom) {
2328 goto out;
2329 }
2330 if (zerofrom & (blocksize-1)) {
2331 *bytes |= (blocksize-1);
2332 (*bytes)++;
2333 }
2334 len = offset - zerofrom;
2335
2336 err = pagecache_write_begin(file, mapping, curpos, len,
2337 AOP_FLAG_UNINTERRUPTIBLE,
2338 &page, &fsdata);
2339 if (err)
2340 goto out;
2341 zero_user(page, zerofrom, len);
2342 err = pagecache_write_end(file, mapping, curpos, len, len,
2343 page, fsdata);
2344 if (err < 0)
2345 goto out;
2346 BUG_ON(err != len);
2347 err = 0;
2348 }
2349out:
2350 return err;
2351}
2352
2353
2354
2355
2356
2357int cont_write_begin(struct file *file, struct address_space *mapping,
2358 loff_t pos, unsigned len, unsigned flags,
2359 struct page **pagep, void **fsdata,
2360 get_block_t *get_block, loff_t *bytes)
2361{
2362 struct inode *inode = mapping->host;
2363 unsigned blocksize = 1 << inode->i_blkbits;
2364 unsigned zerofrom;
2365 int err;
2366
2367 err = cont_expand_zero(file, mapping, pos, bytes);
2368 if (err)
2369 return err;
2370
2371 zerofrom = *bytes & ~PAGE_CACHE_MASK;
2372 if (pos+len > *bytes && zerofrom & (blocksize-1)) {
2373 *bytes |= (blocksize-1);
2374 (*bytes)++;
2375 }
2376
2377 return block_write_begin(mapping, pos, len, flags, pagep, get_block);
2378}
2379EXPORT_SYMBOL(cont_write_begin);
2380
2381int block_commit_write(struct page *page, unsigned from, unsigned to)
2382{
2383 struct inode *inode = page->mapping->host;
2384 __block_commit_write(inode,page,from,to);
2385 return 0;
2386}
2387EXPORT_SYMBOL(block_commit_write);
2388
2389
2390
2391
2392
2393
2394
2395
2396
2397
2398
2399
2400
2401
2402
2403
2404
2405
2406
2407int __block_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf,
2408 get_block_t get_block)
2409{
2410 struct page *page = vmf->page;
2411 struct inode *inode = file_inode(vma->vm_file);
2412 unsigned long end;
2413 loff_t size;
2414 int ret;
2415
2416 lock_page(page);
2417 size = i_size_read(inode);
2418 if ((page->mapping != inode->i_mapping) ||
2419 (page_offset(page) > size)) {
2420
2421 ret = -EFAULT;
2422 goto out_unlock;
2423 }
2424
2425
2426 if (((page->index + 1) << PAGE_CACHE_SHIFT) > size)
2427 end = size & ~PAGE_CACHE_MASK;
2428 else
2429 end = PAGE_CACHE_SIZE;
2430
2431 ret = __block_write_begin(page, 0, end, get_block);
2432 if (!ret)
2433 ret = block_commit_write(page, 0, end);
2434
2435 if (unlikely(ret < 0))
2436 goto out_unlock;
2437 set_page_dirty(page);
2438 wait_for_stable_page(page);
2439 return 0;
2440out_unlock:
2441 unlock_page(page);
2442 return ret;
2443}
2444EXPORT_SYMBOL(__block_page_mkwrite);
2445
2446int block_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf,
2447 get_block_t get_block)
2448{
2449 int ret;
2450 struct super_block *sb = file_inode(vma->vm_file)->i_sb;
2451
2452 sb_start_pagefault(sb);
2453
2454
2455
2456
2457
2458 file_update_time(vma->vm_file);
2459
2460 ret = __block_page_mkwrite(vma, vmf, get_block);
2461 sb_end_pagefault(sb);
2462 return block_page_mkwrite_return(ret);
2463}
2464EXPORT_SYMBOL(block_page_mkwrite);
2465
2466
2467
2468
2469
2470
2471static void end_buffer_read_nobh(struct buffer_head *bh, int uptodate)
2472{
2473 __end_buffer_read_notouch(bh, uptodate);
2474}
2475
2476
2477
2478
2479
2480
2481static void attach_nobh_buffers(struct page *page, struct buffer_head *head)
2482{
2483 struct buffer_head *bh;
2484
2485 BUG_ON(!PageLocked(page));
2486
2487 spin_lock(&page->mapping->private_lock);
2488 bh = head;
2489 do {
2490 if (PageDirty(page))
2491 set_buffer_dirty(bh);
2492 if (!bh->b_this_page)
2493 bh->b_this_page = head;
2494 bh = bh->b_this_page;
2495 } while (bh != head);
2496 attach_page_buffers(page, head);
2497 spin_unlock(&page->mapping->private_lock);
2498}
2499
2500
2501
2502
2503
2504
2505int nobh_write_begin(struct address_space *mapping,
2506 loff_t pos, unsigned len, unsigned flags,
2507 struct page **pagep, void **fsdata,
2508 get_block_t *get_block)
2509{
2510 struct inode *inode = mapping->host;
2511 const unsigned blkbits = inode->i_blkbits;
2512 const unsigned blocksize = 1 << blkbits;
2513 struct buffer_head *head, *bh;
2514 struct page *page;
2515 pgoff_t index;
2516 unsigned from, to;
2517 unsigned block_in_page;
2518 unsigned block_start, block_end;
2519 sector_t block_in_file;
2520 int nr_reads = 0;
2521 int ret = 0;
2522 int is_mapped_to_disk = 1;
2523
2524 index = pos >> PAGE_CACHE_SHIFT;
2525 from = pos & (PAGE_CACHE_SIZE - 1);
2526 to = from + len;
2527
2528 page = grab_cache_page_write_begin(mapping, index, flags);
2529 if (!page)
2530 return -ENOMEM;
2531 *pagep = page;
2532 *fsdata = NULL;
2533
2534 if (page_has_buffers(page)) {
2535 ret = __block_write_begin(page, pos, len, get_block);
2536 if (unlikely(ret))
2537 goto out_release;
2538 return ret;
2539 }
2540
2541 if (PageMappedToDisk(page))
2542 return 0;
2543
2544
2545
2546
2547
2548
2549
2550
2551
2552
2553 head = alloc_page_buffers(page, blocksize, 0);
2554 if (!head) {
2555 ret = -ENOMEM;
2556 goto out_release;
2557 }
2558
2559 block_in_file = (sector_t)page->index << (PAGE_CACHE_SHIFT - blkbits);
2560
2561
2562
2563
2564
2565
2566 for (block_start = 0, block_in_page = 0, bh = head;
2567 block_start < PAGE_CACHE_SIZE;
2568 block_in_page++, block_start += blocksize, bh = bh->b_this_page) {
2569 int create;
2570
2571 block_end = block_start + blocksize;
2572 bh->b_state = 0;
2573 create = 1;
2574 if (block_start >= to)
2575 create = 0;
2576 ret = get_block(inode, block_in_file + block_in_page,
2577 bh, create);
2578 if (ret)
2579 goto failed;
2580 if (!buffer_mapped(bh))
2581 is_mapped_to_disk = 0;
2582 if (buffer_new(bh))
2583 unmap_underlying_metadata(bh->b_bdev, bh->b_blocknr);
2584 if (PageUptodate(page)) {
2585 set_buffer_uptodate(bh);
2586 continue;
2587 }
2588 if (buffer_new(bh) || !buffer_mapped(bh)) {
2589 zero_user_segments(page, block_start, from,
2590 to, block_end);
2591 continue;
2592 }
2593 if (buffer_uptodate(bh))
2594 continue;
2595 if (block_start < from || block_end > to) {
2596 lock_buffer(bh);
2597 bh->b_end_io = end_buffer_read_nobh;
2598 submit_bh(READ, bh);
2599 nr_reads++;
2600 }
2601 }
2602
2603 if (nr_reads) {
2604
2605
2606
2607
2608
2609 for (bh = head; bh; bh = bh->b_this_page) {
2610 wait_on_buffer(bh);
2611 if (!buffer_uptodate(bh))
2612 ret = -EIO;
2613 }
2614 if (ret)
2615 goto failed;
2616 }
2617
2618 if (is_mapped_to_disk)
2619 SetPageMappedToDisk(page);
2620
2621 *fsdata = head;
2622
2623 return 0;
2624
2625failed:
2626 BUG_ON(!ret);
2627
2628
2629
2630
2631
2632
2633
2634 attach_nobh_buffers(page, head);
2635 page_zero_new_buffers(page, from, to);
2636
2637out_release:
2638 unlock_page(page);
2639 page_cache_release(page);
2640 *pagep = NULL;
2641
2642 return ret;
2643}
2644EXPORT_SYMBOL(nobh_write_begin);
2645
2646int nobh_write_end(struct file *file, struct address_space *mapping,
2647 loff_t pos, unsigned len, unsigned copied,
2648 struct page *page, void *fsdata)
2649{
2650 struct inode *inode = page->mapping->host;
2651 struct buffer_head *head = fsdata;
2652 struct buffer_head *bh;
2653 BUG_ON(fsdata != NULL && page_has_buffers(page));
2654
2655 if (unlikely(copied < len) && head)
2656 attach_nobh_buffers(page, head);
2657 if (page_has_buffers(page))
2658 return generic_write_end(file, mapping, pos, len,
2659 copied, page, fsdata);
2660
2661 SetPageUptodate(page);
2662 set_page_dirty(page);
2663 if (pos+copied > inode->i_size) {
2664 i_size_write(inode, pos+copied);
2665 mark_inode_dirty(inode);
2666 }
2667
2668 unlock_page(page);
2669 page_cache_release(page);
2670
2671 while (head) {
2672 bh = head;
2673 head = head->b_this_page;
2674 free_buffer_head(bh);
2675 }
2676
2677 return copied;
2678}
2679EXPORT_SYMBOL(nobh_write_end);
2680
2681
2682
2683
2684
2685
2686int nobh_writepage(struct page *page, get_block_t *get_block,
2687 struct writeback_control *wbc)
2688{
2689 struct inode * const inode = page->mapping->host;
2690 loff_t i_size = i_size_read(inode);
2691 const pgoff_t end_index = i_size >> PAGE_CACHE_SHIFT;
2692 unsigned offset;
2693 int ret;
2694
2695
2696 if (page->index < end_index)
2697 goto out;
2698
2699
2700 offset = i_size & (PAGE_CACHE_SIZE-1);
2701 if (page->index >= end_index+1 || !offset) {
2702
2703
2704
2705
2706
2707#if 0
2708
2709 if (page->mapping->a_ops->invalidatepage)
2710 page->mapping->a_ops->invalidatepage(page, offset);
2711#endif
2712 unlock_page(page);
2713 return 0;
2714 }
2715
2716
2717
2718
2719
2720
2721
2722
2723 zero_user_segment(page, offset, PAGE_CACHE_SIZE);
2724out:
2725 ret = mpage_writepage(page, get_block, wbc);
2726 if (ret == -EAGAIN)
2727 ret = __block_write_full_page(inode, page, get_block, wbc,
2728 end_buffer_async_write);
2729 return ret;
2730}
2731EXPORT_SYMBOL(nobh_writepage);
2732
2733int nobh_truncate_page(struct address_space *mapping,
2734 loff_t from, get_block_t *get_block)
2735{
2736 pgoff_t index = from >> PAGE_CACHE_SHIFT;
2737 unsigned offset = from & (PAGE_CACHE_SIZE-1);
2738 unsigned blocksize;
2739 sector_t iblock;
2740 unsigned length, pos;
2741 struct inode *inode = mapping->host;
2742 struct page *page;
2743 struct buffer_head map_bh;
2744 int err;
2745
2746 blocksize = 1 << inode->i_blkbits;
2747 length = offset & (blocksize - 1);
2748
2749
2750 if (!length)
2751 return 0;
2752
2753 length = blocksize - length;
2754 iblock = (sector_t)index << (PAGE_CACHE_SHIFT - inode->i_blkbits);
2755
2756 page = grab_cache_page(mapping, index);
2757 err = -ENOMEM;
2758 if (!page)
2759 goto out;
2760
2761 if (page_has_buffers(page)) {
2762has_buffers:
2763 unlock_page(page);
2764 page_cache_release(page);
2765 return block_truncate_page(mapping, from, get_block);
2766 }
2767
2768
2769 pos = blocksize;
2770 while (offset >= pos) {
2771 iblock++;
2772 pos += blocksize;
2773 }
2774
2775 map_bh.b_size = blocksize;
2776 map_bh.b_state = 0;
2777 err = get_block(inode, iblock, &map_bh, 0);
2778 if (err)
2779 goto unlock;
2780
2781 if (!buffer_mapped(&map_bh))
2782 goto unlock;
2783
2784
2785 if (!PageUptodate(page)) {
2786 err = mapping->a_ops->readpage(NULL, page);
2787 if (err) {
2788 page_cache_release(page);
2789 goto out;
2790 }
2791 lock_page(page);
2792 if (!PageUptodate(page)) {
2793 err = -EIO;
2794 goto unlock;
2795 }
2796 if (page_has_buffers(page))
2797 goto has_buffers;
2798 }
2799 zero_user(page, offset, length);
2800 set_page_dirty(page);
2801 err = 0;
2802
2803unlock:
2804 unlock_page(page);
2805 page_cache_release(page);
2806out:
2807 return err;
2808}
2809EXPORT_SYMBOL(nobh_truncate_page);
2810
2811int block_truncate_page(struct address_space *mapping,
2812 loff_t from, get_block_t *get_block)
2813{
2814 pgoff_t index = from >> PAGE_CACHE_SHIFT;
2815 unsigned offset = from & (PAGE_CACHE_SIZE-1);
2816 unsigned blocksize;
2817 sector_t iblock;
2818 unsigned length, pos;
2819 struct inode *inode = mapping->host;
2820 struct page *page;
2821 struct buffer_head *bh;
2822 int err;
2823
2824 blocksize = 1 << inode->i_blkbits;
2825 length = offset & (blocksize - 1);
2826
2827
2828 if (!length)
2829 return 0;
2830
2831 length = blocksize - length;
2832 iblock = (sector_t)index << (PAGE_CACHE_SHIFT - inode->i_blkbits);
2833
2834 page = grab_cache_page(mapping, index);
2835 err = -ENOMEM;
2836 if (!page)
2837 goto out;
2838
2839 if (!page_has_buffers(page))
2840 create_empty_buffers(page, blocksize, 0);
2841
2842
2843 bh = page_buffers(page);
2844 pos = blocksize;
2845 while (offset >= pos) {
2846 bh = bh->b_this_page;
2847 iblock++;
2848 pos += blocksize;
2849 }
2850
2851 err = 0;
2852 if (!buffer_mapped(bh)) {
2853 WARN_ON(bh->b_size != blocksize);
2854 err = get_block(inode, iblock, bh, 0);
2855 if (err)
2856 goto unlock;
2857
2858 if (!buffer_mapped(bh))
2859 goto unlock;
2860 }
2861
2862
2863 if (PageUptodate(page))
2864 set_buffer_uptodate(bh);
2865
2866 if (!buffer_uptodate(bh) && !buffer_delay(bh) && !buffer_unwritten(bh)) {
2867 err = -EIO;
2868 ll_rw_block(READ, 1, &bh);
2869 wait_on_buffer(bh);
2870
2871 if (!buffer_uptodate(bh))
2872 goto unlock;
2873 }
2874
2875 zero_user(page, offset, length);
2876 mark_buffer_dirty(bh);
2877 err = 0;
2878
2879unlock:
2880 unlock_page(page);
2881 page_cache_release(page);
2882out:
2883 return err;
2884}
2885EXPORT_SYMBOL(block_truncate_page);
2886
2887
2888
2889
2890int block_write_full_page(struct page *page, get_block_t *get_block,
2891 struct writeback_control *wbc)
2892{
2893 struct inode * const inode = page->mapping->host;
2894 loff_t i_size = i_size_read(inode);
2895 const pgoff_t end_index = i_size >> PAGE_CACHE_SHIFT;
2896 unsigned offset;
2897
2898
2899 if (page->index < end_index)
2900 return __block_write_full_page(inode, page, get_block, wbc,
2901 end_buffer_async_write);
2902
2903
2904 offset = i_size & (PAGE_CACHE_SIZE-1);
2905 if (page->index >= end_index+1 || !offset) {
2906
2907
2908
2909
2910
2911 do_invalidatepage(page, 0, PAGE_CACHE_SIZE);
2912 unlock_page(page);
2913 return 0;
2914 }
2915
2916
2917
2918
2919
2920
2921
2922
2923 zero_user_segment(page, offset, PAGE_CACHE_SIZE);
2924 return __block_write_full_page(inode, page, get_block, wbc,
2925 end_buffer_async_write);
2926}
2927EXPORT_SYMBOL(block_write_full_page);
2928
2929sector_t generic_block_bmap(struct address_space *mapping, sector_t block,
2930 get_block_t *get_block)
2931{
2932 struct buffer_head tmp;
2933 struct inode *inode = mapping->host;
2934 tmp.b_state = 0;
2935 tmp.b_blocknr = 0;
2936 tmp.b_size = 1 << inode->i_blkbits;
2937 get_block(inode, block, &tmp, 0);
2938 return tmp.b_blocknr;
2939}
2940EXPORT_SYMBOL(generic_block_bmap);
2941
2942static void end_bio_bh_io_sync(struct bio *bio, int err)
2943{
2944 struct buffer_head *bh = bio->bi_private;
2945
2946 if (err == -EOPNOTSUPP) {
2947 set_bit(BIO_EOPNOTSUPP, &bio->bi_flags);
2948 }
2949
2950 if (unlikely (test_bit(BIO_QUIET,&bio->bi_flags)))
2951 set_bit(BH_Quiet, &bh->b_state);
2952
2953 bh->b_end_io(bh, test_bit(BIO_UPTODATE, &bio->bi_flags));
2954 bio_put(bio);
2955}
2956
2957
2958
2959
2960
2961
2962
2963
2964
2965
2966
2967
2968
2969static void guard_bh_eod(int rw, struct bio *bio, struct buffer_head *bh)
2970{
2971 sector_t maxsector;
2972 unsigned bytes;
2973
2974 maxsector = i_size_read(bio->bi_bdev->bd_inode) >> 9;
2975 if (!maxsector)
2976 return;
2977
2978
2979
2980
2981
2982
2983 if (unlikely(bio->bi_iter.bi_sector >= maxsector))
2984 return;
2985
2986 maxsector -= bio->bi_iter.bi_sector;
2987 bytes = bio->bi_iter.bi_size;
2988 if (likely((bytes >> 9) <= maxsector))
2989 return;
2990
2991
2992 bytes = maxsector << 9;
2993
2994
2995 bio->bi_iter.bi_size = bytes;
2996 bio->bi_io_vec[0].bv_len = bytes;
2997
2998
2999 if ((rw & RW_MASK) == READ) {
3000 void *kaddr = kmap_atomic(bh->b_page);
3001 memset(kaddr + bh_offset(bh) + bytes, 0, bh->b_size - bytes);
3002 kunmap_atomic(kaddr);
3003 flush_dcache_page(bh->b_page);
3004 }
3005}
3006
3007int _submit_bh(int rw, struct buffer_head *bh, unsigned long bio_flags)
3008{
3009 struct bio *bio;
3010 int ret = 0;
3011
3012 BUG_ON(!buffer_locked(bh));
3013 BUG_ON(!buffer_mapped(bh));
3014 BUG_ON(!bh->b_end_io);
3015 BUG_ON(buffer_delay(bh));
3016 BUG_ON(buffer_unwritten(bh));
3017
3018
3019
3020
3021 if (test_set_buffer_req(bh) && (rw & WRITE))
3022 clear_buffer_write_io_error(bh);
3023
3024
3025
3026
3027
3028 bio = bio_alloc(GFP_NOIO, 1);
3029
3030 bio->bi_iter.bi_sector = bh->b_blocknr * (bh->b_size >> 9);
3031 bio->bi_bdev = bh->b_bdev;
3032 bio->bi_io_vec[0].bv_page = bh->b_page;
3033 bio->bi_io_vec[0].bv_len = bh->b_size;
3034 bio->bi_io_vec[0].bv_offset = bh_offset(bh);
3035
3036 bio->bi_vcnt = 1;
3037 bio->bi_iter.bi_size = bh->b_size;
3038
3039 bio->bi_end_io = end_bio_bh_io_sync;
3040 bio->bi_private = bh;
3041 bio->bi_flags |= bio_flags;
3042
3043
3044 guard_bh_eod(rw, bio, bh);
3045
3046 if (buffer_meta(bh))
3047 rw |= REQ_META;
3048 if (buffer_prio(bh))
3049 rw |= REQ_PRIO;
3050
3051 bio_get(bio);
3052 submit_bio(rw, bio);
3053
3054 if (bio_flagged(bio, BIO_EOPNOTSUPP))
3055 ret = -EOPNOTSUPP;
3056
3057 bio_put(bio);
3058 return ret;
3059}
3060EXPORT_SYMBOL_GPL(_submit_bh);
3061
3062int submit_bh(int rw, struct buffer_head *bh)
3063{
3064 return _submit_bh(rw, bh, 0);
3065}
3066EXPORT_SYMBOL(submit_bh);
3067
3068
3069
3070
3071
3072
3073
3074
3075
3076
3077
3078
3079
3080
3081
3082
3083
3084
3085
3086
3087
3088
3089
3090
3091
3092
3093void ll_rw_block(int rw, int nr, struct buffer_head *bhs[])
3094{
3095 int i;
3096
3097 for (i = 0; i < nr; i++) {
3098 struct buffer_head *bh = bhs[i];
3099
3100 if (!trylock_buffer(bh))
3101 continue;
3102 if (rw == WRITE) {
3103 if (test_clear_buffer_dirty(bh)) {
3104 bh->b_end_io = end_buffer_write_sync;
3105 get_bh(bh);
3106 submit_bh(WRITE, bh);
3107 continue;
3108 }
3109 } else {
3110 if (!buffer_uptodate(bh)) {
3111 bh->b_end_io = end_buffer_read_sync;
3112 get_bh(bh);
3113 submit_bh(rw, bh);
3114 continue;
3115 }
3116 }
3117 unlock_buffer(bh);
3118 }
3119}
3120EXPORT_SYMBOL(ll_rw_block);
3121
3122void write_dirty_buffer(struct buffer_head *bh, int rw)
3123{
3124 lock_buffer(bh);
3125 if (!test_clear_buffer_dirty(bh)) {
3126 unlock_buffer(bh);
3127 return;
3128 }
3129 bh->b_end_io = end_buffer_write_sync;
3130 get_bh(bh);
3131 submit_bh(rw, bh);
3132}
3133EXPORT_SYMBOL(write_dirty_buffer);
3134
3135
3136
3137
3138
3139
3140int __sync_dirty_buffer(struct buffer_head *bh, int rw)
3141{
3142 int ret = 0;
3143
3144 WARN_ON(atomic_read(&bh->b_count) < 1);
3145 lock_buffer(bh);
3146 if (test_clear_buffer_dirty(bh)) {
3147 get_bh(bh);
3148 bh->b_end_io = end_buffer_write_sync;
3149 ret = submit_bh(rw, bh);
3150 wait_on_buffer(bh);
3151 if (!ret && !buffer_uptodate(bh))
3152 ret = -EIO;
3153 } else {
3154 unlock_buffer(bh);
3155 }
3156 return ret;
3157}
3158EXPORT_SYMBOL(__sync_dirty_buffer);
3159
3160int sync_dirty_buffer(struct buffer_head *bh)
3161{
3162 return __sync_dirty_buffer(bh, WRITE_SYNC);
3163}
3164EXPORT_SYMBOL(sync_dirty_buffer);
3165
3166
3167
3168
3169
3170
3171
3172
3173
3174
3175
3176
3177
3178
3179
3180
3181
3182
3183
3184
3185
3186static inline int buffer_busy(struct buffer_head *bh)
3187{
3188 return atomic_read(&bh->b_count) |
3189 (bh->b_state & ((1 << BH_Dirty) | (1 << BH_Lock)));
3190}
3191
3192static int
3193drop_buffers(struct page *page, struct buffer_head **buffers_to_free)
3194{
3195 struct buffer_head *head = page_buffers(page);
3196 struct buffer_head *bh;
3197
3198 bh = head;
3199 do {
3200 if (buffer_write_io_error(bh) && page->mapping)
3201 set_bit(AS_EIO, &page->mapping->flags);
3202 if (buffer_busy(bh))
3203 goto failed;
3204 bh = bh->b_this_page;
3205 } while (bh != head);
3206
3207 do {
3208 struct buffer_head *next = bh->b_this_page;
3209
3210 if (bh->b_assoc_map)
3211 __remove_assoc_queue(bh);
3212 bh = next;
3213 } while (bh != head);
3214 *buffers_to_free = head;
3215 __clear_page_buffers(page);
3216 return 1;
3217failed:
3218 return 0;
3219}
3220
3221int try_to_free_buffers(struct page *page)
3222{
3223 struct address_space * const mapping = page->mapping;
3224 struct buffer_head *buffers_to_free = NULL;
3225 int ret = 0;
3226
3227 BUG_ON(!PageLocked(page));
3228 if (PageWriteback(page))
3229 return 0;
3230
3231 if (mapping == NULL) {
3232 ret = drop_buffers(page, &buffers_to_free);
3233 goto out;
3234 }
3235
3236 spin_lock(&mapping->private_lock);
3237 ret = drop_buffers(page, &buffers_to_free);
3238
3239
3240
3241
3242
3243
3244
3245
3246
3247
3248
3249
3250
3251
3252
3253 if (ret)
3254 cancel_dirty_page(page, PAGE_CACHE_SIZE);
3255 spin_unlock(&mapping->private_lock);
3256out:
3257 if (buffers_to_free) {
3258 struct buffer_head *bh = buffers_to_free;
3259
3260 do {
3261 struct buffer_head *next = bh->b_this_page;
3262 free_buffer_head(bh);
3263 bh = next;
3264 } while (bh != buffers_to_free);
3265 }
3266 return ret;
3267}
3268EXPORT_SYMBOL(try_to_free_buffers);
3269
3270
3271
3272
3273
3274
3275
3276
3277SYSCALL_DEFINE2(bdflush, int, func, long, data)
3278{
3279 static int msg_count;
3280
3281 if (!capable(CAP_SYS_ADMIN))
3282 return -EPERM;
3283
3284 if (msg_count < 5) {
3285 msg_count++;
3286 printk(KERN_INFO
3287 "warning: process `%s' used the obsolete bdflush"
3288 " system call\n", current->comm);
3289 printk(KERN_INFO "Fix your initscripts?\n");
3290 }
3291
3292 if (func == 1)
3293 do_exit(0);
3294 return 0;
3295}
3296
3297
3298
3299
3300static struct kmem_cache *bh_cachep __read_mostly;
3301
3302
3303
3304
3305
3306static unsigned long max_buffer_heads;
3307
3308int buffer_heads_over_limit;
3309
3310struct bh_accounting {
3311 int nr;
3312 int ratelimit;
3313};
3314
3315static DEFINE_PER_CPU(struct bh_accounting, bh_accounting) = {0, 0};
3316
3317static void recalc_bh_state(void)
3318{
3319 int i;
3320 int tot = 0;
3321
3322 if (__this_cpu_inc_return(bh_accounting.ratelimit) - 1 < 4096)
3323 return;
3324 __this_cpu_write(bh_accounting.ratelimit, 0);
3325 for_each_online_cpu(i)
3326 tot += per_cpu(bh_accounting, i).nr;
3327 buffer_heads_over_limit = (tot > max_buffer_heads);
3328}
3329
3330struct buffer_head *alloc_buffer_head(gfp_t gfp_flags)
3331{
3332 struct buffer_head *ret = kmem_cache_zalloc(bh_cachep, gfp_flags);
3333 if (ret) {
3334 INIT_LIST_HEAD(&ret->b_assoc_buffers);
3335 preempt_disable();
3336 __this_cpu_inc(bh_accounting.nr);
3337 recalc_bh_state();
3338 preempt_enable();
3339 }
3340 return ret;
3341}
3342EXPORT_SYMBOL(alloc_buffer_head);
3343
3344void free_buffer_head(struct buffer_head *bh)
3345{
3346 BUG_ON(!list_empty(&bh->b_assoc_buffers));
3347 kmem_cache_free(bh_cachep, bh);
3348 preempt_disable();
3349 __this_cpu_dec(bh_accounting.nr);
3350 recalc_bh_state();
3351 preempt_enable();
3352}
3353EXPORT_SYMBOL(free_buffer_head);
3354
3355static void buffer_exit_cpu(int cpu)
3356{
3357 int i;
3358 struct bh_lru *b = &per_cpu(bh_lrus, cpu);
3359
3360 for (i = 0; i < BH_LRU_SIZE; i++) {
3361 brelse(b->bhs[i]);
3362 b->bhs[i] = NULL;
3363 }
3364 this_cpu_add(bh_accounting.nr, per_cpu(bh_accounting, cpu).nr);
3365 per_cpu(bh_accounting, cpu).nr = 0;
3366}
3367
3368static int buffer_cpu_notify(struct notifier_block *self,
3369 unsigned long action, void *hcpu)
3370{
3371 if (action == CPU_DEAD || action == CPU_DEAD_FROZEN)
3372 buffer_exit_cpu((unsigned long)hcpu);
3373 return NOTIFY_OK;
3374}
3375
3376
3377
3378
3379
3380
3381
3382
3383int bh_uptodate_or_lock(struct buffer_head *bh)
3384{
3385 if (!buffer_uptodate(bh)) {
3386 lock_buffer(bh);
3387 if (!buffer_uptodate(bh))
3388 return 0;
3389 unlock_buffer(bh);
3390 }
3391 return 1;
3392}
3393EXPORT_SYMBOL(bh_uptodate_or_lock);
3394
3395
3396
3397
3398
3399
3400
3401int bh_submit_read(struct buffer_head *bh)
3402{
3403 BUG_ON(!buffer_locked(bh));
3404
3405 if (buffer_uptodate(bh)) {
3406 unlock_buffer(bh);
3407 return 0;
3408 }
3409
3410 get_bh(bh);
3411 bh->b_end_io = end_buffer_read_sync;
3412 submit_bh(READ, bh);
3413 wait_on_buffer(bh);
3414 if (buffer_uptodate(bh))
3415 return 0;
3416 return -EIO;
3417}
3418EXPORT_SYMBOL(bh_submit_read);
3419
3420void __init buffer_init(void)
3421{
3422 unsigned long nrpages;
3423
3424 bh_cachep = kmem_cache_create("buffer_head",
3425 sizeof(struct buffer_head), 0,
3426 (SLAB_RECLAIM_ACCOUNT|SLAB_PANIC|
3427 SLAB_MEM_SPREAD),
3428 NULL);
3429
3430
3431
3432
3433 nrpages = (nr_free_buffer_pages() * 10) / 100;
3434 max_buffer_heads = nrpages * (PAGE_SIZE / sizeof(struct buffer_head));
3435 hotcpu_notifier(buffer_cpu_notify, 0);
3436}
3437