1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21#include <linux/kernel.h>
22#include <linux/syscalls.h>
23#include <linux/fs.h>
24#include <linux/mm.h>
25#include <linux/percpu.h>
26#include <linux/slab.h>
27#include <linux/capability.h>
28#include <linux/blkdev.h>
29#include <linux/file.h>
30#include <linux/quotaops.h>
31#include <linux/highmem.h>
32#include <linux/export.h>
33#include <linux/writeback.h>
34#include <linux/hash.h>
35#include <linux/suspend.h>
36#include <linux/buffer_head.h>
37#include <linux/task_io_accounting_ops.h>
38#include <linux/bio.h>
39#include <linux/notifier.h>
40#include <linux/cpu.h>
41#include <linux/bitops.h>
42#include <linux/mpage.h>
43#include <linux/bit_spinlock.h>
44#include <trace/events/block.h>
45
46static int fsync_buffers_list(spinlock_t *lock, struct list_head *list);
47
48#define BH_ENTRY(list) list_entry((list), struct buffer_head, b_assoc_buffers)
49
50void init_buffer(struct buffer_head *bh, bh_end_io_t *handler, void *private)
51{
52 bh->b_end_io = handler;
53 bh->b_private = private;
54}
55EXPORT_SYMBOL(init_buffer);
56
57inline void touch_buffer(struct buffer_head *bh)
58{
59 trace_block_touch_buffer(bh);
60 mark_page_accessed(bh->b_page);
61}
62EXPORT_SYMBOL(touch_buffer);
63
64void __lock_buffer(struct buffer_head *bh)
65{
66 wait_on_bit_lock_io(&bh->b_state, BH_Lock, TASK_UNINTERRUPTIBLE);
67}
68EXPORT_SYMBOL(__lock_buffer);
69
70void unlock_buffer(struct buffer_head *bh)
71{
72 clear_bit_unlock(BH_Lock, &bh->b_state);
73 smp_mb__after_atomic();
74 wake_up_bit(&bh->b_state, BH_Lock);
75}
76EXPORT_SYMBOL(unlock_buffer);
77
78
79
80
81
82
83void buffer_check_dirty_writeback(struct page *page,
84 bool *dirty, bool *writeback)
85{
86 struct buffer_head *head, *bh;
87 *dirty = false;
88 *writeback = false;
89
90 BUG_ON(!PageLocked(page));
91
92 if (!page_has_buffers(page))
93 return;
94
95 if (PageWriteback(page))
96 *writeback = true;
97
98 head = page_buffers(page);
99 bh = head;
100 do {
101 if (buffer_locked(bh))
102 *writeback = true;
103
104 if (buffer_dirty(bh))
105 *dirty = true;
106
107 bh = bh->b_this_page;
108 } while (bh != head);
109}
110EXPORT_SYMBOL(buffer_check_dirty_writeback);
111
112
113
114
115
116
117void __wait_on_buffer(struct buffer_head * bh)
118{
119 wait_on_bit_io(&bh->b_state, BH_Lock, TASK_UNINTERRUPTIBLE);
120}
121EXPORT_SYMBOL(__wait_on_buffer);
122
123static void
124__clear_page_buffers(struct page *page)
125{
126 ClearPagePrivate(page);
127 set_page_private(page, 0);
128 page_cache_release(page);
129}
130
131static void buffer_io_error(struct buffer_head *bh, char *msg)
132{
133 char b[BDEVNAME_SIZE];
134
135 if (!test_bit(BH_Quiet, &bh->b_state))
136 printk_ratelimited(KERN_ERR
137 "Buffer I/O error on dev %s, logical block %llu%s\n",
138 bdevname(bh->b_bdev, b),
139 (unsigned long long)bh->b_blocknr, msg);
140}
141
142
143
144
145
146
147
148
149
150static void __end_buffer_read_notouch(struct buffer_head *bh, int uptodate)
151{
152 if (uptodate) {
153 set_buffer_uptodate(bh);
154 } else {
155
156 clear_buffer_uptodate(bh);
157 }
158 unlock_buffer(bh);
159}
160
161
162
163
164
165void end_buffer_read_sync(struct buffer_head *bh, int uptodate)
166{
167 __end_buffer_read_notouch(bh, uptodate);
168 put_bh(bh);
169}
170EXPORT_SYMBOL(end_buffer_read_sync);
171
172void end_buffer_write_sync(struct buffer_head *bh, int uptodate)
173{
174 if (uptodate) {
175 set_buffer_uptodate(bh);
176 } else {
177 buffer_io_error(bh, ", lost sync page write");
178 set_buffer_write_io_error(bh);
179 clear_buffer_uptodate(bh);
180 }
181 unlock_buffer(bh);
182 put_bh(bh);
183}
184EXPORT_SYMBOL(end_buffer_write_sync);
185
186
187
188
189
190
191
192
193
194
195
196
197static struct buffer_head *
198__find_get_block_slow(struct block_device *bdev, sector_t block)
199{
200 struct inode *bd_inode = bdev->bd_inode;
201 struct address_space *bd_mapping = bd_inode->i_mapping;
202 struct buffer_head *ret = NULL;
203 pgoff_t index;
204 struct buffer_head *bh;
205 struct buffer_head *head;
206 struct page *page;
207 int all_mapped = 1;
208
209 index = block >> (PAGE_CACHE_SHIFT - bd_inode->i_blkbits);
210 page = find_get_page_flags(bd_mapping, index, FGP_ACCESSED);
211 if (!page)
212 goto out;
213
214 spin_lock(&bd_mapping->private_lock);
215 if (!page_has_buffers(page))
216 goto out_unlock;
217 head = page_buffers(page);
218 bh = head;
219 do {
220 if (!buffer_mapped(bh))
221 all_mapped = 0;
222 else if (bh->b_blocknr == block) {
223 ret = bh;
224 get_bh(bh);
225 goto out_unlock;
226 }
227 bh = bh->b_this_page;
228 } while (bh != head);
229
230
231
232
233
234
235 if (all_mapped) {
236 char b[BDEVNAME_SIZE];
237
238 printk("__find_get_block_slow() failed. "
239 "block=%llu, b_blocknr=%llu\n",
240 (unsigned long long)block,
241 (unsigned long long)bh->b_blocknr);
242 printk("b_state=0x%08lx, b_size=%zu\n",
243 bh->b_state, bh->b_size);
244 printk("device %s blocksize: %d\n", bdevname(bdev, b),
245 1 << bd_inode->i_blkbits);
246 }
247out_unlock:
248 spin_unlock(&bd_mapping->private_lock);
249 page_cache_release(page);
250out:
251 return ret;
252}
253
254
255
256
257static void free_more_memory(void)
258{
259 struct zone *zone;
260 int nid;
261
262 wakeup_flusher_threads(1024, WB_REASON_FREE_MORE_MEM);
263 yield();
264
265 for_each_online_node(nid) {
266 (void)first_zones_zonelist(node_zonelist(nid, GFP_NOFS),
267 gfp_zone(GFP_NOFS), NULL,
268 &zone);
269 if (zone)
270 try_to_free_pages(node_zonelist(nid, GFP_NOFS), 0,
271 GFP_NOFS, NULL);
272 }
273}
274
275
276
277
278
279static void end_buffer_async_read(struct buffer_head *bh, int uptodate)
280{
281 unsigned long flags;
282 struct buffer_head *first;
283 struct buffer_head *tmp;
284 struct page *page;
285 int page_uptodate = 1;
286
287 BUG_ON(!buffer_async_read(bh));
288
289 page = bh->b_page;
290 if (uptodate) {
291 set_buffer_uptodate(bh);
292 } else {
293 clear_buffer_uptodate(bh);
294 buffer_io_error(bh, ", async page read");
295 SetPageError(page);
296 }
297
298
299
300
301
302
303 first = page_buffers(page);
304 local_irq_save(flags);
305 bit_spin_lock(BH_Uptodate_Lock, &first->b_state);
306 clear_buffer_async_read(bh);
307 unlock_buffer(bh);
308 tmp = bh;
309 do {
310 if (!buffer_uptodate(tmp))
311 page_uptodate = 0;
312 if (buffer_async_read(tmp)) {
313 BUG_ON(!buffer_locked(tmp));
314 goto still_busy;
315 }
316 tmp = tmp->b_this_page;
317 } while (tmp != bh);
318 bit_spin_unlock(BH_Uptodate_Lock, &first->b_state);
319 local_irq_restore(flags);
320
321
322
323
324
325 if (page_uptodate && !PageError(page))
326 SetPageUptodate(page);
327 unlock_page(page);
328 return;
329
330still_busy:
331 bit_spin_unlock(BH_Uptodate_Lock, &first->b_state);
332 local_irq_restore(flags);
333 return;
334}
335
336
337
338
339
340void end_buffer_async_write(struct buffer_head *bh, int uptodate)
341{
342 unsigned long flags;
343 struct buffer_head *first;
344 struct buffer_head *tmp;
345 struct page *page;
346
347 BUG_ON(!buffer_async_write(bh));
348
349 page = bh->b_page;
350 if (uptodate) {
351 set_buffer_uptodate(bh);
352 } else {
353 buffer_io_error(bh, ", lost async page write");
354 set_bit(AS_EIO, &page->mapping->flags);
355 set_buffer_write_io_error(bh);
356 clear_buffer_uptodate(bh);
357 SetPageError(page);
358 }
359
360 first = page_buffers(page);
361 local_irq_save(flags);
362 bit_spin_lock(BH_Uptodate_Lock, &first->b_state);
363
364 clear_buffer_async_write(bh);
365 unlock_buffer(bh);
366 tmp = bh->b_this_page;
367 while (tmp != bh) {
368 if (buffer_async_write(tmp)) {
369 BUG_ON(!buffer_locked(tmp));
370 goto still_busy;
371 }
372 tmp = tmp->b_this_page;
373 }
374 bit_spin_unlock(BH_Uptodate_Lock, &first->b_state);
375 local_irq_restore(flags);
376 end_page_writeback(page);
377 return;
378
379still_busy:
380 bit_spin_unlock(BH_Uptodate_Lock, &first->b_state);
381 local_irq_restore(flags);
382 return;
383}
384EXPORT_SYMBOL(end_buffer_async_write);
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407static void mark_buffer_async_read(struct buffer_head *bh)
408{
409 bh->b_end_io = end_buffer_async_read;
410 set_buffer_async_read(bh);
411}
412
413static void mark_buffer_async_write_endio(struct buffer_head *bh,
414 bh_end_io_t *handler)
415{
416 bh->b_end_io = handler;
417 set_buffer_async_write(bh);
418}
419
420void mark_buffer_async_write(struct buffer_head *bh)
421{
422 mark_buffer_async_write_endio(bh, end_buffer_async_write);
423}
424EXPORT_SYMBOL(mark_buffer_async_write);
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479static void __remove_assoc_queue(struct buffer_head *bh)
480{
481 list_del_init(&bh->b_assoc_buffers);
482 WARN_ON(!bh->b_assoc_map);
483 if (buffer_write_io_error(bh))
484 set_bit(AS_EIO, &bh->b_assoc_map->flags);
485 bh->b_assoc_map = NULL;
486}
487
488int inode_has_buffers(struct inode *inode)
489{
490 return !list_empty(&inode->i_data.private_list);
491}
492
493
494
495
496
497
498
499
500
501
502
503static int osync_buffers_list(spinlock_t *lock, struct list_head *list)
504{
505 struct buffer_head *bh;
506 struct list_head *p;
507 int err = 0;
508
509 spin_lock(lock);
510repeat:
511 list_for_each_prev(p, list) {
512 bh = BH_ENTRY(p);
513 if (buffer_locked(bh)) {
514 get_bh(bh);
515 spin_unlock(lock);
516 wait_on_buffer(bh);
517 if (!buffer_uptodate(bh))
518 err = -EIO;
519 brelse(bh);
520 spin_lock(lock);
521 goto repeat;
522 }
523 }
524 spin_unlock(lock);
525 return err;
526}
527
528static void do_thaw_one(struct super_block *sb, void *unused)
529{
530 char b[BDEVNAME_SIZE];
531 while (sb->s_bdev && !thaw_bdev(sb->s_bdev, sb))
532 printk(KERN_WARNING "Emergency Thaw on %s\n",
533 bdevname(sb->s_bdev, b));
534}
535
536static void do_thaw_all(struct work_struct *work)
537{
538 iterate_supers(do_thaw_one, NULL);
539 kfree(work);
540 printk(KERN_WARNING "Emergency Thaw complete\n");
541}
542
543
544
545
546
547
548void emergency_thaw_all(void)
549{
550 struct work_struct *work;
551
552 work = kmalloc(sizeof(*work), GFP_ATOMIC);
553 if (work) {
554 INIT_WORK(work, do_thaw_all);
555 schedule_work(work);
556 }
557}
558
559
560
561
562
563
564
565
566
567
568
569
570int sync_mapping_buffers(struct address_space *mapping)
571{
572 struct address_space *buffer_mapping = mapping->private_data;
573
574 if (buffer_mapping == NULL || list_empty(&mapping->private_list))
575 return 0;
576
577 return fsync_buffers_list(&buffer_mapping->private_lock,
578 &mapping->private_list);
579}
580EXPORT_SYMBOL(sync_mapping_buffers);
581
582
583
584
585
586
587
588void write_boundary_block(struct block_device *bdev,
589 sector_t bblock, unsigned blocksize)
590{
591 struct buffer_head *bh = __find_get_block(bdev, bblock + 1, blocksize);
592 if (bh) {
593 if (buffer_dirty(bh))
594 ll_rw_block(WRITE, 1, &bh);
595 put_bh(bh);
596 }
597}
598
599void mark_buffer_dirty_inode(struct buffer_head *bh, struct inode *inode)
600{
601 struct address_space *mapping = inode->i_mapping;
602 struct address_space *buffer_mapping = bh->b_page->mapping;
603
604 mark_buffer_dirty(bh);
605 if (!mapping->private_data) {
606 mapping->private_data = buffer_mapping;
607 } else {
608 BUG_ON(mapping->private_data != buffer_mapping);
609 }
610 if (!bh->b_assoc_map) {
611 spin_lock(&buffer_mapping->private_lock);
612 list_move_tail(&bh->b_assoc_buffers,
613 &mapping->private_list);
614 bh->b_assoc_map = mapping;
615 spin_unlock(&buffer_mapping->private_lock);
616 }
617}
618EXPORT_SYMBOL(mark_buffer_dirty_inode);
619
620
621
622
623
624
625
626
627static void __set_page_dirty(struct page *page,
628 struct address_space *mapping, int warn)
629{
630 unsigned long flags;
631
632 spin_lock_irqsave(&mapping->tree_lock, flags);
633 if (page->mapping) {
634 WARN_ON_ONCE(warn && !PageUptodate(page));
635 account_page_dirtied(page, mapping);
636 radix_tree_tag_set(&mapping->page_tree,
637 page_index(page), PAGECACHE_TAG_DIRTY);
638 }
639 spin_unlock_irqrestore(&mapping->tree_lock, flags);
640 __mark_inode_dirty(mapping->host, I_DIRTY_PAGES);
641}
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668int __set_page_dirty_buffers(struct page *page)
669{
670 int newly_dirty;
671 struct address_space *mapping = page_mapping(page);
672
673 if (unlikely(!mapping))
674 return !TestSetPageDirty(page);
675
676 spin_lock(&mapping->private_lock);
677 if (page_has_buffers(page)) {
678 struct buffer_head *head = page_buffers(page);
679 struct buffer_head *bh = head;
680
681 do {
682 set_buffer_dirty(bh);
683 bh = bh->b_this_page;
684 } while (bh != head);
685 }
686 newly_dirty = !TestSetPageDirty(page);
687 spin_unlock(&mapping->private_lock);
688
689 if (newly_dirty)
690 __set_page_dirty(page, mapping, 1);
691 return newly_dirty;
692}
693EXPORT_SYMBOL(__set_page_dirty_buffers);
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714static int fsync_buffers_list(spinlock_t *lock, struct list_head *list)
715{
716 struct buffer_head *bh;
717 struct list_head tmp;
718 struct address_space *mapping;
719 int err = 0, err2;
720 struct blk_plug plug;
721
722 INIT_LIST_HEAD(&tmp);
723 blk_start_plug(&plug);
724
725 spin_lock(lock);
726 while (!list_empty(list)) {
727 bh = BH_ENTRY(list->next);
728 mapping = bh->b_assoc_map;
729 __remove_assoc_queue(bh);
730
731
732 smp_mb();
733 if (buffer_dirty(bh) || buffer_locked(bh)) {
734 list_add(&bh->b_assoc_buffers, &tmp);
735 bh->b_assoc_map = mapping;
736 if (buffer_dirty(bh)) {
737 get_bh(bh);
738 spin_unlock(lock);
739
740
741
742
743
744
745
746 write_dirty_buffer(bh, WRITE_SYNC);
747
748
749
750
751
752
753
754 brelse(bh);
755 spin_lock(lock);
756 }
757 }
758 }
759
760 spin_unlock(lock);
761 blk_finish_plug(&plug);
762 spin_lock(lock);
763
764 while (!list_empty(&tmp)) {
765 bh = BH_ENTRY(tmp.prev);
766 get_bh(bh);
767 mapping = bh->b_assoc_map;
768 __remove_assoc_queue(bh);
769
770
771 smp_mb();
772 if (buffer_dirty(bh)) {
773 list_add(&bh->b_assoc_buffers,
774 &mapping->private_list);
775 bh->b_assoc_map = mapping;
776 }
777 spin_unlock(lock);
778 wait_on_buffer(bh);
779 if (!buffer_uptodate(bh))
780 err = -EIO;
781 brelse(bh);
782 spin_lock(lock);
783 }
784
785 spin_unlock(lock);
786 err2 = osync_buffers_list(lock, list);
787 if (err)
788 return err;
789 else
790 return err2;
791}
792
793
794
795
796
797
798
799
800
801
802void invalidate_inode_buffers(struct inode *inode)
803{
804 if (inode_has_buffers(inode)) {
805 struct address_space *mapping = &inode->i_data;
806 struct list_head *list = &mapping->private_list;
807 struct address_space *buffer_mapping = mapping->private_data;
808
809 spin_lock(&buffer_mapping->private_lock);
810 while (!list_empty(list))
811 __remove_assoc_queue(BH_ENTRY(list->next));
812 spin_unlock(&buffer_mapping->private_lock);
813 }
814}
815EXPORT_SYMBOL(invalidate_inode_buffers);
816
817
818
819
820
821
822
823int remove_inode_buffers(struct inode *inode)
824{
825 int ret = 1;
826
827 if (inode_has_buffers(inode)) {
828 struct address_space *mapping = &inode->i_data;
829 struct list_head *list = &mapping->private_list;
830 struct address_space *buffer_mapping = mapping->private_data;
831
832 spin_lock(&buffer_mapping->private_lock);
833 while (!list_empty(list)) {
834 struct buffer_head *bh = BH_ENTRY(list->next);
835 if (buffer_dirty(bh)) {
836 ret = 0;
837 break;
838 }
839 __remove_assoc_queue(bh);
840 }
841 spin_unlock(&buffer_mapping->private_lock);
842 }
843 return ret;
844}
845
846
847
848
849
850
851
852
853
854
855struct buffer_head *alloc_page_buffers(struct page *page, unsigned long size,
856 int retry)
857{
858 struct buffer_head *bh, *head;
859 long offset;
860
861try_again:
862 head = NULL;
863 offset = PAGE_SIZE;
864 while ((offset -= size) >= 0) {
865 bh = alloc_buffer_head(GFP_NOFS);
866 if (!bh)
867 goto no_grow;
868
869 bh->b_this_page = head;
870 bh->b_blocknr = -1;
871 head = bh;
872
873 bh->b_size = size;
874
875
876 set_bh_page(bh, page, offset);
877 }
878 return head;
879
880
881
882no_grow:
883 if (head) {
884 do {
885 bh = head;
886 head = head->b_this_page;
887 free_buffer_head(bh);
888 } while (head);
889 }
890
891
892
893
894
895
896
897 if (!retry)
898 return NULL;
899
900
901
902
903
904
905
906 free_more_memory();
907 goto try_again;
908}
909EXPORT_SYMBOL_GPL(alloc_page_buffers);
910
911static inline void
912link_dev_buffers(struct page *page, struct buffer_head *head)
913{
914 struct buffer_head *bh, *tail;
915
916 bh = head;
917 do {
918 tail = bh;
919 bh = bh->b_this_page;
920 } while (bh);
921 tail->b_this_page = head;
922 attach_page_buffers(page, head);
923}
924
925static sector_t blkdev_max_block(struct block_device *bdev, unsigned int size)
926{
927 sector_t retval = ~((sector_t)0);
928 loff_t sz = i_size_read(bdev->bd_inode);
929
930 if (sz) {
931 unsigned int sizebits = blksize_bits(size);
932 retval = (sz >> sizebits);
933 }
934 return retval;
935}
936
937
938
939
940static sector_t
941init_page_buffers(struct page *page, struct block_device *bdev,
942 sector_t block, int size)
943{
944 struct buffer_head *head = page_buffers(page);
945 struct buffer_head *bh = head;
946 int uptodate = PageUptodate(page);
947 sector_t end_block = blkdev_max_block(I_BDEV(bdev->bd_inode), size);
948
949 do {
950 if (!buffer_mapped(bh)) {
951 init_buffer(bh, NULL, NULL);
952 bh->b_bdev = bdev;
953 bh->b_blocknr = block;
954 if (uptodate)
955 set_buffer_uptodate(bh);
956 if (block < end_block)
957 set_buffer_mapped(bh);
958 }
959 block++;
960 bh = bh->b_this_page;
961 } while (bh != head);
962
963
964
965
966 return end_block;
967}
968
969
970
971
972
973
974static int
975grow_dev_page(struct block_device *bdev, sector_t block,
976 pgoff_t index, int size, int sizebits, gfp_t gfp)
977{
978 struct inode *inode = bdev->bd_inode;
979 struct page *page;
980 struct buffer_head *bh;
981 sector_t end_block;
982 int ret = 0;
983 gfp_t gfp_mask;
984
985 gfp_mask = (mapping_gfp_mask(inode->i_mapping) & ~__GFP_FS) | gfp;
986
987
988
989
990
991
992
993 gfp_mask |= __GFP_NOFAIL;
994
995 page = find_or_create_page(inode->i_mapping, index, gfp_mask);
996 if (!page)
997 return ret;
998
999 BUG_ON(!PageLocked(page));
1000
1001 if (page_has_buffers(page)) {
1002 bh = page_buffers(page);
1003 if (bh->b_size == size) {
1004 end_block = init_page_buffers(page, bdev,
1005 (sector_t)index << sizebits,
1006 size);
1007 goto done;
1008 }
1009 if (!try_to_free_buffers(page))
1010 goto failed;
1011 }
1012
1013
1014
1015
1016 bh = alloc_page_buffers(page, size, 0);
1017 if (!bh)
1018 goto failed;
1019
1020
1021
1022
1023
1024
1025 spin_lock(&inode->i_mapping->private_lock);
1026 link_dev_buffers(page, bh);
1027 end_block = init_page_buffers(page, bdev, (sector_t)index << sizebits,
1028 size);
1029 spin_unlock(&inode->i_mapping->private_lock);
1030done:
1031 ret = (block < end_block) ? 1 : -ENXIO;
1032failed:
1033 unlock_page(page);
1034 page_cache_release(page);
1035 return ret;
1036}
1037
1038
1039
1040
1041
1042static int
1043grow_buffers(struct block_device *bdev, sector_t block, int size, gfp_t gfp)
1044{
1045 pgoff_t index;
1046 int sizebits;
1047
1048 sizebits = -1;
1049 do {
1050 sizebits++;
1051 } while ((size << sizebits) < PAGE_SIZE);
1052
1053 index = block >> sizebits;
1054
1055
1056
1057
1058
1059 if (unlikely(index != block >> sizebits)) {
1060 char b[BDEVNAME_SIZE];
1061
1062 printk(KERN_ERR "%s: requested out-of-range block %llu for "
1063 "device %s\n",
1064 __func__, (unsigned long long)block,
1065 bdevname(bdev, b));
1066 return -EIO;
1067 }
1068
1069
1070 return grow_dev_page(bdev, block, index, size, sizebits, gfp);
1071}
1072
1073struct buffer_head *
1074__getblk_slow(struct block_device *bdev, sector_t block,
1075 unsigned size, gfp_t gfp)
1076{
1077
1078 if (unlikely(size & (bdev_logical_block_size(bdev)-1) ||
1079 (size < 512 || size > PAGE_SIZE))) {
1080 printk(KERN_ERR "getblk(): invalid block size %d requested\n",
1081 size);
1082 printk(KERN_ERR "logical block size: %d\n",
1083 bdev_logical_block_size(bdev));
1084
1085 dump_stack();
1086 return NULL;
1087 }
1088
1089 for (;;) {
1090 struct buffer_head *bh;
1091 int ret;
1092
1093 bh = __find_get_block(bdev, block, size);
1094 if (bh)
1095 return bh;
1096
1097 ret = grow_buffers(bdev, block, size, gfp);
1098 if (ret < 0)
1099 return NULL;
1100 if (ret == 0)
1101 free_more_memory();
1102 }
1103}
1104EXPORT_SYMBOL(__getblk_slow);
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141void mark_buffer_dirty(struct buffer_head *bh)
1142{
1143 WARN_ON_ONCE(!buffer_uptodate(bh));
1144
1145 trace_block_dirty_buffer(bh);
1146
1147
1148
1149
1150
1151
1152
1153 if (buffer_dirty(bh)) {
1154 smp_mb();
1155 if (buffer_dirty(bh))
1156 return;
1157 }
1158
1159 if (!test_set_buffer_dirty(bh)) {
1160 struct page *page = bh->b_page;
1161 if (!TestSetPageDirty(page)) {
1162 struct address_space *mapping = page_mapping(page);
1163 if (mapping)
1164 __set_page_dirty(page, mapping, 0);
1165 }
1166 }
1167}
1168EXPORT_SYMBOL(mark_buffer_dirty);
1169
1170
1171
1172
1173
1174
1175
1176
1177void __brelse(struct buffer_head * buf)
1178{
1179 if (atomic_read(&buf->b_count)) {
1180 put_bh(buf);
1181 return;
1182 }
1183 WARN(1, KERN_ERR "VFS: brelse: Trying to free free buffer\n");
1184}
1185EXPORT_SYMBOL(__brelse);
1186
1187
1188
1189
1190
1191void __bforget(struct buffer_head *bh)
1192{
1193 clear_buffer_dirty(bh);
1194 if (bh->b_assoc_map) {
1195 struct address_space *buffer_mapping = bh->b_page->mapping;
1196
1197 spin_lock(&buffer_mapping->private_lock);
1198 list_del_init(&bh->b_assoc_buffers);
1199 bh->b_assoc_map = NULL;
1200 spin_unlock(&buffer_mapping->private_lock);
1201 }
1202 __brelse(bh);
1203}
1204EXPORT_SYMBOL(__bforget);
1205
1206static struct buffer_head *__bread_slow(struct buffer_head *bh)
1207{
1208 lock_buffer(bh);
1209 if (buffer_uptodate(bh)) {
1210 unlock_buffer(bh);
1211 return bh;
1212 } else {
1213 get_bh(bh);
1214 bh->b_end_io = end_buffer_read_sync;
1215 submit_bh(READ, bh);
1216 wait_on_buffer(bh);
1217 if (buffer_uptodate(bh))
1218 return bh;
1219 }
1220 brelse(bh);
1221 return NULL;
1222}
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238#define BH_LRU_SIZE 16
1239
1240struct bh_lru {
1241 struct buffer_head *bhs[BH_LRU_SIZE];
1242};
1243
1244static DEFINE_PER_CPU(struct bh_lru, bh_lrus) = {{ NULL }};
1245
1246#ifdef CONFIG_SMP
1247#define bh_lru_lock() local_irq_disable()
1248#define bh_lru_unlock() local_irq_enable()
1249#else
1250#define bh_lru_lock() preempt_disable()
1251#define bh_lru_unlock() preempt_enable()
1252#endif
1253
1254static inline void check_irqs_on(void)
1255{
1256#ifdef irqs_disabled
1257 BUG_ON(irqs_disabled());
1258#endif
1259}
1260
1261
1262
1263
1264static void bh_lru_install(struct buffer_head *bh)
1265{
1266 struct buffer_head *evictee = NULL;
1267
1268 check_irqs_on();
1269 bh_lru_lock();
1270 if (__this_cpu_read(bh_lrus.bhs[0]) != bh) {
1271 struct buffer_head *bhs[BH_LRU_SIZE];
1272 int in;
1273 int out = 0;
1274
1275 get_bh(bh);
1276 bhs[out++] = bh;
1277 for (in = 0; in < BH_LRU_SIZE; in++) {
1278 struct buffer_head *bh2 =
1279 __this_cpu_read(bh_lrus.bhs[in]);
1280
1281 if (bh2 == bh) {
1282 __brelse(bh2);
1283 } else {
1284 if (out >= BH_LRU_SIZE) {
1285 BUG_ON(evictee != NULL);
1286 evictee = bh2;
1287 } else {
1288 bhs[out++] = bh2;
1289 }
1290 }
1291 }
1292 while (out < BH_LRU_SIZE)
1293 bhs[out++] = NULL;
1294 memcpy(this_cpu_ptr(&bh_lrus.bhs), bhs, sizeof(bhs));
1295 }
1296 bh_lru_unlock();
1297
1298 if (evictee)
1299 __brelse(evictee);
1300}
1301
1302
1303
1304
1305static struct buffer_head *
1306lookup_bh_lru(struct block_device *bdev, sector_t block, unsigned size)
1307{
1308 struct buffer_head *ret = NULL;
1309 unsigned int i;
1310
1311 check_irqs_on();
1312 bh_lru_lock();
1313 for (i = 0; i < BH_LRU_SIZE; i++) {
1314 struct buffer_head *bh = __this_cpu_read(bh_lrus.bhs[i]);
1315
1316 if (bh && bh->b_blocknr == block && bh->b_bdev == bdev &&
1317 bh->b_size == size) {
1318 if (i) {
1319 while (i) {
1320 __this_cpu_write(bh_lrus.bhs[i],
1321 __this_cpu_read(bh_lrus.bhs[i - 1]));
1322 i--;
1323 }
1324 __this_cpu_write(bh_lrus.bhs[0], bh);
1325 }
1326 get_bh(bh);
1327 ret = bh;
1328 break;
1329 }
1330 }
1331 bh_lru_unlock();
1332 return ret;
1333}
1334
1335
1336
1337
1338
1339
1340struct buffer_head *
1341__find_get_block(struct block_device *bdev, sector_t block, unsigned size)
1342{
1343 struct buffer_head *bh = lookup_bh_lru(bdev, block, size);
1344
1345 if (bh == NULL) {
1346
1347 bh = __find_get_block_slow(bdev, block);
1348 if (bh)
1349 bh_lru_install(bh);
1350 } else
1351 touch_buffer(bh);
1352
1353 return bh;
1354}
1355EXPORT_SYMBOL(__find_get_block);
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365struct buffer_head *
1366__getblk_gfp(struct block_device *bdev, sector_t block,
1367 unsigned size, gfp_t gfp)
1368{
1369 struct buffer_head *bh = __find_get_block(bdev, block, size);
1370
1371 might_sleep();
1372 if (bh == NULL)
1373 bh = __getblk_slow(bdev, block, size, gfp);
1374 return bh;
1375}
1376EXPORT_SYMBOL(__getblk_gfp);
1377
1378
1379
1380
1381void __breadahead(struct block_device *bdev, sector_t block, unsigned size)
1382{
1383 struct buffer_head *bh = __getblk(bdev, block, size);
1384 if (likely(bh)) {
1385 ll_rw_block(READA, 1, &bh);
1386 brelse(bh);
1387 }
1388}
1389EXPORT_SYMBOL(__breadahead);
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403struct buffer_head *
1404__bread_gfp(struct block_device *bdev, sector_t block,
1405 unsigned size, gfp_t gfp)
1406{
1407 struct buffer_head *bh = __getblk_gfp(bdev, block, size, gfp);
1408
1409 if (likely(bh) && !buffer_uptodate(bh))
1410 bh = __bread_slow(bh);
1411 return bh;
1412}
1413EXPORT_SYMBOL(__bread_gfp);
1414
1415
1416
1417
1418
1419
1420static void invalidate_bh_lru(void *arg)
1421{
1422 struct bh_lru *b = &get_cpu_var(bh_lrus);
1423 int i;
1424
1425 for (i = 0; i < BH_LRU_SIZE; i++) {
1426 brelse(b->bhs[i]);
1427 b->bhs[i] = NULL;
1428 }
1429 put_cpu_var(bh_lrus);
1430}
1431
1432static bool has_bh_in_lru(int cpu, void *dummy)
1433{
1434 struct bh_lru *b = per_cpu_ptr(&bh_lrus, cpu);
1435 int i;
1436
1437 for (i = 0; i < BH_LRU_SIZE; i++) {
1438 if (b->bhs[i])
1439 return 1;
1440 }
1441
1442 return 0;
1443}
1444
1445void invalidate_bh_lrus(void)
1446{
1447 on_each_cpu_cond(has_bh_in_lru, invalidate_bh_lru, NULL, 1, GFP_KERNEL);
1448}
1449EXPORT_SYMBOL_GPL(invalidate_bh_lrus);
1450
1451void set_bh_page(struct buffer_head *bh,
1452 struct page *page, unsigned long offset)
1453{
1454 bh->b_page = page;
1455 BUG_ON(offset >= PAGE_SIZE);
1456 if (PageHighMem(page))
1457
1458
1459
1460 bh->b_data = (char *)(0 + offset);
1461 else
1462 bh->b_data = page_address(page) + offset;
1463}
1464EXPORT_SYMBOL(set_bh_page);
1465
1466
1467
1468
1469
1470
1471#define BUFFER_FLAGS_DISCARD \
1472 (1 << BH_Mapped | 1 << BH_New | 1 << BH_Req | \
1473 1 << BH_Delay | 1 << BH_Unwritten)
1474
1475static void discard_buffer(struct buffer_head * bh)
1476{
1477 unsigned long b_state, b_state_old;
1478
1479 lock_buffer(bh);
1480 clear_buffer_dirty(bh);
1481 bh->b_bdev = NULL;
1482 b_state = bh->b_state;
1483 for (;;) {
1484 b_state_old = cmpxchg(&bh->b_state, b_state,
1485 (b_state & ~BUFFER_FLAGS_DISCARD));
1486 if (b_state_old == b_state)
1487 break;
1488 b_state = b_state_old;
1489 }
1490 unlock_buffer(bh);
1491}
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509void block_invalidatepage(struct page *page, unsigned int offset,
1510 unsigned int length)
1511{
1512 struct buffer_head *head, *bh, *next;
1513 unsigned int curr_off = 0;
1514 unsigned int stop = length + offset;
1515
1516 BUG_ON(!PageLocked(page));
1517 if (!page_has_buffers(page))
1518 goto out;
1519
1520
1521
1522
1523 BUG_ON(stop > PAGE_CACHE_SIZE || stop < length);
1524
1525 head = page_buffers(page);
1526 bh = head;
1527 do {
1528 unsigned int next_off = curr_off + bh->b_size;
1529 next = bh->b_this_page;
1530
1531
1532
1533
1534 if (next_off > stop)
1535 goto out;
1536
1537
1538
1539
1540 if (offset <= curr_off)
1541 discard_buffer(bh);
1542 curr_off = next_off;
1543 bh = next;
1544 } while (bh != head);
1545
1546
1547
1548
1549
1550
1551 if (offset == 0)
1552 try_to_release_page(page, 0);
1553out:
1554 return;
1555}
1556EXPORT_SYMBOL(block_invalidatepage);
1557
1558
1559
1560
1561
1562
1563
1564void create_empty_buffers(struct page *page,
1565 unsigned long blocksize, unsigned long b_state)
1566{
1567 struct buffer_head *bh, *head, *tail;
1568
1569 head = alloc_page_buffers(page, blocksize, 1);
1570 bh = head;
1571 do {
1572 bh->b_state |= b_state;
1573 tail = bh;
1574 bh = bh->b_this_page;
1575 } while (bh);
1576 tail->b_this_page = head;
1577
1578 spin_lock(&page->mapping->private_lock);
1579 if (PageUptodate(page) || PageDirty(page)) {
1580 bh = head;
1581 do {
1582 if (PageDirty(page))
1583 set_buffer_dirty(bh);
1584 if (PageUptodate(page))
1585 set_buffer_uptodate(bh);
1586 bh = bh->b_this_page;
1587 } while (bh != head);
1588 }
1589 attach_page_buffers(page, head);
1590 spin_unlock(&page->mapping->private_lock);
1591}
1592EXPORT_SYMBOL(create_empty_buffers);
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610void unmap_underlying_metadata(struct block_device *bdev, sector_t block)
1611{
1612 struct buffer_head *old_bh;
1613
1614 might_sleep();
1615
1616 old_bh = __find_get_block_slow(bdev, block);
1617 if (old_bh) {
1618 clear_buffer_dirty(old_bh);
1619 wait_on_buffer(old_bh);
1620 clear_buffer_req(old_bh);
1621 __brelse(old_bh);
1622 }
1623}
1624EXPORT_SYMBOL(unmap_underlying_metadata);
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634static inline int block_size_bits(unsigned int blocksize)
1635{
1636 return ilog2(blocksize);
1637}
1638
1639static struct buffer_head *create_page_buffers(struct page *page, struct inode *inode, unsigned int b_state)
1640{
1641 BUG_ON(!PageLocked(page));
1642
1643 if (!page_has_buffers(page))
1644 create_empty_buffers(page, 1 << ACCESS_ONCE(inode->i_blkbits), b_state);
1645 return page_buffers(page);
1646}
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677static int __block_write_full_page(struct inode *inode, struct page *page,
1678 get_block_t *get_block, struct writeback_control *wbc,
1679 bh_end_io_t *handler)
1680{
1681 int err;
1682 sector_t block;
1683 sector_t last_block;
1684 struct buffer_head *bh, *head;
1685 unsigned int blocksize, bbits;
1686 int nr_underway = 0;
1687 int write_op = (wbc->sync_mode == WB_SYNC_ALL ?
1688 WRITE_SYNC : WRITE);
1689
1690 head = create_page_buffers(page, inode,
1691 (1 << BH_Dirty)|(1 << BH_Uptodate));
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703 bh = head;
1704 blocksize = bh->b_size;
1705 bbits = block_size_bits(blocksize);
1706
1707 block = (sector_t)page->index << (PAGE_CACHE_SHIFT - bbits);
1708 last_block = (i_size_read(inode) - 1) >> bbits;
1709
1710
1711
1712
1713
1714 do {
1715 if (block > last_block) {
1716
1717
1718
1719
1720
1721
1722
1723
1724 clear_buffer_dirty(bh);
1725 set_buffer_uptodate(bh);
1726 } else if ((!buffer_mapped(bh) || buffer_delay(bh)) &&
1727 buffer_dirty(bh)) {
1728 WARN_ON(bh->b_size != blocksize);
1729 err = get_block(inode, block, bh, 1);
1730 if (err)
1731 goto recover;
1732 clear_buffer_delay(bh);
1733 if (buffer_new(bh)) {
1734
1735 clear_buffer_new(bh);
1736 unmap_underlying_metadata(bh->b_bdev,
1737 bh->b_blocknr);
1738 }
1739 }
1740 bh = bh->b_this_page;
1741 block++;
1742 } while (bh != head);
1743
1744 do {
1745 if (!buffer_mapped(bh))
1746 continue;
1747
1748
1749
1750
1751
1752
1753
1754 if (wbc->sync_mode != WB_SYNC_NONE) {
1755 lock_buffer(bh);
1756 } else if (!trylock_buffer(bh)) {
1757 redirty_page_for_writepage(wbc, page);
1758 continue;
1759 }
1760 if (test_clear_buffer_dirty(bh)) {
1761 mark_buffer_async_write_endio(bh, handler);
1762 } else {
1763 unlock_buffer(bh);
1764 }
1765 } while ((bh = bh->b_this_page) != head);
1766
1767
1768
1769
1770
1771 BUG_ON(PageWriteback(page));
1772 set_page_writeback(page);
1773
1774 do {
1775 struct buffer_head *next = bh->b_this_page;
1776 if (buffer_async_write(bh)) {
1777 submit_bh(write_op, bh);
1778 nr_underway++;
1779 }
1780 bh = next;
1781 } while (bh != head);
1782 unlock_page(page);
1783
1784 err = 0;
1785done:
1786 if (nr_underway == 0) {
1787
1788
1789
1790
1791
1792 end_page_writeback(page);
1793
1794
1795
1796
1797
1798 }
1799 return err;
1800
1801recover:
1802
1803
1804
1805
1806
1807
1808 bh = head;
1809
1810 do {
1811 if (buffer_mapped(bh) && buffer_dirty(bh) &&
1812 !buffer_delay(bh)) {
1813 lock_buffer(bh);
1814 mark_buffer_async_write_endio(bh, handler);
1815 } else {
1816
1817
1818
1819
1820 clear_buffer_dirty(bh);
1821 }
1822 } while ((bh = bh->b_this_page) != head);
1823 SetPageError(page);
1824 BUG_ON(PageWriteback(page));
1825 mapping_set_error(page->mapping, err);
1826 set_page_writeback(page);
1827 do {
1828 struct buffer_head *next = bh->b_this_page;
1829 if (buffer_async_write(bh)) {
1830 clear_buffer_dirty(bh);
1831 submit_bh(write_op, bh);
1832 nr_underway++;
1833 }
1834 bh = next;
1835 } while (bh != head);
1836 unlock_page(page);
1837 goto done;
1838}
1839
1840
1841
1842
1843
1844
1845void page_zero_new_buffers(struct page *page, unsigned from, unsigned to)
1846{
1847 unsigned int block_start, block_end;
1848 struct buffer_head *head, *bh;
1849
1850 BUG_ON(!PageLocked(page));
1851 if (!page_has_buffers(page))
1852 return;
1853
1854 bh = head = page_buffers(page);
1855 block_start = 0;
1856 do {
1857 block_end = block_start + bh->b_size;
1858
1859 if (buffer_new(bh)) {
1860 if (block_end > from && block_start < to) {
1861 if (!PageUptodate(page)) {
1862 unsigned start, size;
1863
1864 start = max(from, block_start);
1865 size = min(to, block_end) - start;
1866
1867 zero_user(page, start, size);
1868 set_buffer_uptodate(bh);
1869 }
1870
1871 clear_buffer_new(bh);
1872 mark_buffer_dirty(bh);
1873 }
1874 }
1875
1876 block_start = block_end;
1877 bh = bh->b_this_page;
1878 } while (bh != head);
1879}
1880EXPORT_SYMBOL(page_zero_new_buffers);
1881
1882int __block_write_begin(struct page *page, loff_t pos, unsigned len,
1883 get_block_t *get_block)
1884{
1885 unsigned from = pos & (PAGE_CACHE_SIZE - 1);
1886 unsigned to = from + len;
1887 struct inode *inode = page->mapping->host;
1888 unsigned block_start, block_end;
1889 sector_t block;
1890 int err = 0;
1891 unsigned blocksize, bbits;
1892 struct buffer_head *bh, *head, *wait[2], **wait_bh=wait;
1893
1894 BUG_ON(!PageLocked(page));
1895 BUG_ON(from > PAGE_CACHE_SIZE);
1896 BUG_ON(to > PAGE_CACHE_SIZE);
1897 BUG_ON(from > to);
1898
1899 head = create_page_buffers(page, inode, 0);
1900 blocksize = head->b_size;
1901 bbits = block_size_bits(blocksize);
1902
1903 block = (sector_t)page->index << (PAGE_CACHE_SHIFT - bbits);
1904
1905 for(bh = head, block_start = 0; bh != head || !block_start;
1906 block++, block_start=block_end, bh = bh->b_this_page) {
1907 block_end = block_start + blocksize;
1908 if (block_end <= from || block_start >= to) {
1909 if (PageUptodate(page)) {
1910 if (!buffer_uptodate(bh))
1911 set_buffer_uptodate(bh);
1912 }
1913 continue;
1914 }
1915 if (buffer_new(bh))
1916 clear_buffer_new(bh);
1917 if (!buffer_mapped(bh)) {
1918 WARN_ON(bh->b_size != blocksize);
1919 err = get_block(inode, block, bh, 1);
1920 if (err)
1921 break;
1922 if (buffer_new(bh)) {
1923 unmap_underlying_metadata(bh->b_bdev,
1924 bh->b_blocknr);
1925 if (PageUptodate(page)) {
1926 clear_buffer_new(bh);
1927 set_buffer_uptodate(bh);
1928 mark_buffer_dirty(bh);
1929 continue;
1930 }
1931 if (block_end > to || block_start < from)
1932 zero_user_segments(page,
1933 to, block_end,
1934 block_start, from);
1935 continue;
1936 }
1937 }
1938 if (PageUptodate(page)) {
1939 if (!buffer_uptodate(bh))
1940 set_buffer_uptodate(bh);
1941 continue;
1942 }
1943 if (!buffer_uptodate(bh) && !buffer_delay(bh) &&
1944 !buffer_unwritten(bh) &&
1945 (block_start < from || block_end > to)) {
1946 ll_rw_block(READ, 1, &bh);
1947 *wait_bh++=bh;
1948 }
1949 }
1950
1951
1952
1953 while(wait_bh > wait) {
1954 wait_on_buffer(*--wait_bh);
1955 if (!buffer_uptodate(*wait_bh))
1956 err = -EIO;
1957 }
1958 if (unlikely(err))
1959 page_zero_new_buffers(page, from, to);
1960 return err;
1961}
1962EXPORT_SYMBOL(__block_write_begin);
1963
1964static int __block_commit_write(struct inode *inode, struct page *page,
1965 unsigned from, unsigned to)
1966{
1967 unsigned block_start, block_end;
1968 int partial = 0;
1969 unsigned blocksize;
1970 struct buffer_head *bh, *head;
1971
1972 bh = head = page_buffers(page);
1973 blocksize = bh->b_size;
1974
1975 block_start = 0;
1976 do {
1977 block_end = block_start + blocksize;
1978 if (block_end <= from || block_start >= to) {
1979 if (!buffer_uptodate(bh))
1980 partial = 1;
1981 } else {
1982 set_buffer_uptodate(bh);
1983 mark_buffer_dirty(bh);
1984 }
1985 clear_buffer_new(bh);
1986
1987 block_start = block_end;
1988 bh = bh->b_this_page;
1989 } while (bh != head);
1990
1991
1992
1993
1994
1995
1996
1997 if (!partial)
1998 SetPageUptodate(page);
1999 return 0;
2000}
2001
2002
2003
2004
2005
2006
2007
2008int block_write_begin(struct address_space *mapping, loff_t pos, unsigned len,
2009 unsigned flags, struct page **pagep, get_block_t *get_block)
2010{
2011 pgoff_t index = pos >> PAGE_CACHE_SHIFT;
2012 struct page *page;
2013 int status;
2014
2015 page = grab_cache_page_write_begin(mapping, index, flags);
2016 if (!page)
2017 return -ENOMEM;
2018
2019 status = __block_write_begin(page, pos, len, get_block);
2020 if (unlikely(status)) {
2021 unlock_page(page);
2022 page_cache_release(page);
2023 page = NULL;
2024 }
2025
2026 *pagep = page;
2027 return status;
2028}
2029EXPORT_SYMBOL(block_write_begin);
2030
2031int block_write_end(struct file *file, struct address_space *mapping,
2032 loff_t pos, unsigned len, unsigned copied,
2033 struct page *page, void *fsdata)
2034{
2035 struct inode *inode = mapping->host;
2036 unsigned start;
2037
2038 start = pos & (PAGE_CACHE_SIZE - 1);
2039
2040 if (unlikely(copied < len)) {
2041
2042
2043
2044
2045
2046
2047
2048
2049
2050
2051
2052
2053 if (!PageUptodate(page))
2054 copied = 0;
2055
2056 page_zero_new_buffers(page, start+copied, start+len);
2057 }
2058 flush_dcache_page(page);
2059
2060
2061 __block_commit_write(inode, page, start, start+copied);
2062
2063 return copied;
2064}
2065EXPORT_SYMBOL(block_write_end);
2066
2067int generic_write_end(struct file *file, struct address_space *mapping,
2068 loff_t pos, unsigned len, unsigned copied,
2069 struct page *page, void *fsdata)
2070{
2071 struct inode *inode = mapping->host;
2072 loff_t old_size = inode->i_size;
2073 int i_size_changed = 0;
2074
2075 copied = block_write_end(file, mapping, pos, len, copied, page, fsdata);
2076
2077
2078
2079
2080
2081
2082
2083
2084 if (pos+copied > inode->i_size) {
2085 i_size_write(inode, pos+copied);
2086 i_size_changed = 1;
2087 }
2088
2089 unlock_page(page);
2090 page_cache_release(page);
2091
2092 if (old_size < pos)
2093 pagecache_isize_extended(inode, old_size, pos);
2094
2095
2096
2097
2098
2099
2100 if (i_size_changed)
2101 mark_inode_dirty(inode);
2102
2103 return copied;
2104}
2105EXPORT_SYMBOL(generic_write_end);
2106
2107
2108
2109
2110
2111
2112
2113
2114int block_is_partially_uptodate(struct page *page, unsigned long from,
2115 unsigned long count)
2116{
2117 unsigned block_start, block_end, blocksize;
2118 unsigned to;
2119 struct buffer_head *bh, *head;
2120 int ret = 1;
2121
2122 if (!page_has_buffers(page))
2123 return 0;
2124
2125 head = page_buffers(page);
2126 blocksize = head->b_size;
2127 to = min_t(unsigned, PAGE_CACHE_SIZE - from, count);
2128 to = from + to;
2129 if (from < blocksize && to > PAGE_CACHE_SIZE - blocksize)
2130 return 0;
2131
2132 bh = head;
2133 block_start = 0;
2134 do {
2135 block_end = block_start + blocksize;
2136 if (block_end > from && block_start < to) {
2137 if (!buffer_uptodate(bh)) {
2138 ret = 0;
2139 break;
2140 }
2141 if (block_end >= to)
2142 break;
2143 }
2144 block_start = block_end;
2145 bh = bh->b_this_page;
2146 } while (bh != head);
2147
2148 return ret;
2149}
2150EXPORT_SYMBOL(block_is_partially_uptodate);
2151
2152
2153
2154
2155
2156
2157
2158
2159int block_read_full_page(struct page *page, get_block_t *get_block)
2160{
2161 struct inode *inode = page->mapping->host;
2162 sector_t iblock, lblock;
2163 struct buffer_head *bh, *head, *arr[MAX_BUF_PER_PAGE];
2164 unsigned int blocksize, bbits;
2165 int nr, i;
2166 int fully_mapped = 1;
2167
2168 head = create_page_buffers(page, inode, 0);
2169 blocksize = head->b_size;
2170 bbits = block_size_bits(blocksize);
2171
2172 iblock = (sector_t)page->index << (PAGE_CACHE_SHIFT - bbits);
2173 lblock = (i_size_read(inode)+blocksize-1) >> bbits;
2174 bh = head;
2175 nr = 0;
2176 i = 0;
2177
2178 do {
2179 if (buffer_uptodate(bh))
2180 continue;
2181
2182 if (!buffer_mapped(bh)) {
2183 int err = 0;
2184
2185 fully_mapped = 0;
2186 if (iblock < lblock) {
2187 WARN_ON(bh->b_size != blocksize);
2188 err = get_block(inode, iblock, bh, 0);
2189 if (err)
2190 SetPageError(page);
2191 }
2192 if (!buffer_mapped(bh)) {
2193 zero_user(page, i * blocksize, blocksize);
2194 if (!err)
2195 set_buffer_uptodate(bh);
2196 continue;
2197 }
2198
2199
2200
2201
2202 if (buffer_uptodate(bh))
2203 continue;
2204 }
2205 arr[nr++] = bh;
2206 } while (i++, iblock++, (bh = bh->b_this_page) != head);
2207
2208 if (fully_mapped)
2209 SetPageMappedToDisk(page);
2210
2211 if (!nr) {
2212
2213
2214
2215
2216 if (!PageError(page))
2217 SetPageUptodate(page);
2218 unlock_page(page);
2219 return 0;
2220 }
2221
2222
2223 for (i = 0; i < nr; i++) {
2224 bh = arr[i];
2225 lock_buffer(bh);
2226 mark_buffer_async_read(bh);
2227 }
2228
2229
2230
2231
2232
2233
2234 for (i = 0; i < nr; i++) {
2235 bh = arr[i];
2236 if (buffer_uptodate(bh))
2237 end_buffer_async_read(bh, 1);
2238 else
2239 submit_bh(READ, bh);
2240 }
2241 return 0;
2242}
2243EXPORT_SYMBOL(block_read_full_page);
2244
2245
2246
2247
2248
2249int generic_cont_expand_simple(struct inode *inode, loff_t size)
2250{
2251 struct address_space *mapping = inode->i_mapping;
2252 struct page *page;
2253 void *fsdata;
2254 int err;
2255
2256 err = inode_newsize_ok(inode, size);
2257 if (err)
2258 goto out;
2259
2260 err = pagecache_write_begin(NULL, mapping, size, 0,
2261 AOP_FLAG_UNINTERRUPTIBLE|AOP_FLAG_CONT_EXPAND,
2262 &page, &fsdata);
2263 if (err)
2264 goto out;
2265
2266 err = pagecache_write_end(NULL, mapping, size, 0, 0, page, fsdata);
2267 BUG_ON(err > 0);
2268
2269out:
2270 return err;
2271}
2272EXPORT_SYMBOL(generic_cont_expand_simple);
2273
2274static int cont_expand_zero(struct file *file, struct address_space *mapping,
2275 loff_t pos, loff_t *bytes)
2276{
2277 struct inode *inode = mapping->host;
2278 unsigned blocksize = 1 << inode->i_blkbits;
2279 struct page *page;
2280 void *fsdata;
2281 pgoff_t index, curidx;
2282 loff_t curpos;
2283 unsigned zerofrom, offset, len;
2284 int err = 0;
2285
2286 index = pos >> PAGE_CACHE_SHIFT;
2287 offset = pos & ~PAGE_CACHE_MASK;
2288
2289 while (index > (curidx = (curpos = *bytes)>>PAGE_CACHE_SHIFT)) {
2290 zerofrom = curpos & ~PAGE_CACHE_MASK;
2291 if (zerofrom & (blocksize-1)) {
2292 *bytes |= (blocksize-1);
2293 (*bytes)++;
2294 }
2295 len = PAGE_CACHE_SIZE - zerofrom;
2296
2297 err = pagecache_write_begin(file, mapping, curpos, len,
2298 AOP_FLAG_UNINTERRUPTIBLE,
2299 &page, &fsdata);
2300 if (err)
2301 goto out;
2302 zero_user(page, zerofrom, len);
2303 err = pagecache_write_end(file, mapping, curpos, len, len,
2304 page, fsdata);
2305 if (err < 0)
2306 goto out;
2307 BUG_ON(err != len);
2308 err = 0;
2309
2310 balance_dirty_pages_ratelimited(mapping);
2311
2312 if (unlikely(fatal_signal_pending(current))) {
2313 err = -EINTR;
2314 goto out;
2315 }
2316 }
2317
2318
2319 if (index == curidx) {
2320 zerofrom = curpos & ~PAGE_CACHE_MASK;
2321
2322 if (offset <= zerofrom) {
2323 goto out;
2324 }
2325 if (zerofrom & (blocksize-1)) {
2326 *bytes |= (blocksize-1);
2327 (*bytes)++;
2328 }
2329 len = offset - zerofrom;
2330
2331 err = pagecache_write_begin(file, mapping, curpos, len,
2332 AOP_FLAG_UNINTERRUPTIBLE,
2333 &page, &fsdata);
2334 if (err)
2335 goto out;
2336 zero_user(page, zerofrom, len);
2337 err = pagecache_write_end(file, mapping, curpos, len, len,
2338 page, fsdata);
2339 if (err < 0)
2340 goto out;
2341 BUG_ON(err != len);
2342 err = 0;
2343 }
2344out:
2345 return err;
2346}
2347
2348
2349
2350
2351
2352int cont_write_begin(struct file *file, struct address_space *mapping,
2353 loff_t pos, unsigned len, unsigned flags,
2354 struct page **pagep, void **fsdata,
2355 get_block_t *get_block, loff_t *bytes)
2356{
2357 struct inode *inode = mapping->host;
2358 unsigned blocksize = 1 << inode->i_blkbits;
2359 unsigned zerofrom;
2360 int err;
2361
2362 err = cont_expand_zero(file, mapping, pos, bytes);
2363 if (err)
2364 return err;
2365
2366 zerofrom = *bytes & ~PAGE_CACHE_MASK;
2367 if (pos+len > *bytes && zerofrom & (blocksize-1)) {
2368 *bytes |= (blocksize-1);
2369 (*bytes)++;
2370 }
2371
2372 return block_write_begin(mapping, pos, len, flags, pagep, get_block);
2373}
2374EXPORT_SYMBOL(cont_write_begin);
2375
2376int block_commit_write(struct page *page, unsigned from, unsigned to)
2377{
2378 struct inode *inode = page->mapping->host;
2379 __block_commit_write(inode,page,from,to);
2380 return 0;
2381}
2382EXPORT_SYMBOL(block_commit_write);
2383
2384
2385
2386
2387
2388
2389
2390
2391
2392
2393
2394
2395
2396
2397
2398
2399
2400
2401
2402int __block_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf,
2403 get_block_t get_block)
2404{
2405 struct page *page = vmf->page;
2406 struct inode *inode = file_inode(vma->vm_file);
2407 unsigned long end;
2408 loff_t size;
2409 int ret;
2410
2411 lock_page(page);
2412 size = i_size_read(inode);
2413 if ((page->mapping != inode->i_mapping) ||
2414 (page_offset(page) > size)) {
2415
2416 ret = -EFAULT;
2417 goto out_unlock;
2418 }
2419
2420
2421 if (((page->index + 1) << PAGE_CACHE_SHIFT) > size)
2422 end = size & ~PAGE_CACHE_MASK;
2423 else
2424 end = PAGE_CACHE_SIZE;
2425
2426 ret = __block_write_begin(page, 0, end, get_block);
2427 if (!ret)
2428 ret = block_commit_write(page, 0, end);
2429
2430 if (unlikely(ret < 0))
2431 goto out_unlock;
2432 set_page_dirty(page);
2433 wait_for_stable_page(page);
2434 return 0;
2435out_unlock:
2436 unlock_page(page);
2437 return ret;
2438}
2439EXPORT_SYMBOL(__block_page_mkwrite);
2440
2441int block_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf,
2442 get_block_t get_block)
2443{
2444 int ret;
2445 struct super_block *sb = file_inode(vma->vm_file)->i_sb;
2446
2447 sb_start_pagefault(sb);
2448
2449
2450
2451
2452
2453 file_update_time(vma->vm_file);
2454
2455 ret = __block_page_mkwrite(vma, vmf, get_block);
2456 sb_end_pagefault(sb);
2457 return block_page_mkwrite_return(ret);
2458}
2459EXPORT_SYMBOL(block_page_mkwrite);
2460
2461
2462
2463
2464
2465
2466static void end_buffer_read_nobh(struct buffer_head *bh, int uptodate)
2467{
2468 __end_buffer_read_notouch(bh, uptodate);
2469}
2470
2471
2472
2473
2474
2475
2476static void attach_nobh_buffers(struct page *page, struct buffer_head *head)
2477{
2478 struct buffer_head *bh;
2479
2480 BUG_ON(!PageLocked(page));
2481
2482 spin_lock(&page->mapping->private_lock);
2483 bh = head;
2484 do {
2485 if (PageDirty(page))
2486 set_buffer_dirty(bh);
2487 if (!bh->b_this_page)
2488 bh->b_this_page = head;
2489 bh = bh->b_this_page;
2490 } while (bh != head);
2491 attach_page_buffers(page, head);
2492 spin_unlock(&page->mapping->private_lock);
2493}
2494
2495
2496
2497
2498
2499
2500int nobh_write_begin(struct address_space *mapping,
2501 loff_t pos, unsigned len, unsigned flags,
2502 struct page **pagep, void **fsdata,
2503 get_block_t *get_block)
2504{
2505 struct inode *inode = mapping->host;
2506 const unsigned blkbits = inode->i_blkbits;
2507 const unsigned blocksize = 1 << blkbits;
2508 struct buffer_head *head, *bh;
2509 struct page *page;
2510 pgoff_t index;
2511 unsigned from, to;
2512 unsigned block_in_page;
2513 unsigned block_start, block_end;
2514 sector_t block_in_file;
2515 int nr_reads = 0;
2516 int ret = 0;
2517 int is_mapped_to_disk = 1;
2518
2519 index = pos >> PAGE_CACHE_SHIFT;
2520 from = pos & (PAGE_CACHE_SIZE - 1);
2521 to = from + len;
2522
2523 page = grab_cache_page_write_begin(mapping, index, flags);
2524 if (!page)
2525 return -ENOMEM;
2526 *pagep = page;
2527 *fsdata = NULL;
2528
2529 if (page_has_buffers(page)) {
2530 ret = __block_write_begin(page, pos, len, get_block);
2531 if (unlikely(ret))
2532 goto out_release;
2533 return ret;
2534 }
2535
2536 if (PageMappedToDisk(page))
2537 return 0;
2538
2539
2540
2541
2542
2543
2544
2545
2546
2547
2548 head = alloc_page_buffers(page, blocksize, 0);
2549 if (!head) {
2550 ret = -ENOMEM;
2551 goto out_release;
2552 }
2553
2554 block_in_file = (sector_t)page->index << (PAGE_CACHE_SHIFT - blkbits);
2555
2556
2557
2558
2559
2560
2561 for (block_start = 0, block_in_page = 0, bh = head;
2562 block_start < PAGE_CACHE_SIZE;
2563 block_in_page++, block_start += blocksize, bh = bh->b_this_page) {
2564 int create;
2565
2566 block_end = block_start + blocksize;
2567 bh->b_state = 0;
2568 create = 1;
2569 if (block_start >= to)
2570 create = 0;
2571 ret = get_block(inode, block_in_file + block_in_page,
2572 bh, create);
2573 if (ret)
2574 goto failed;
2575 if (!buffer_mapped(bh))
2576 is_mapped_to_disk = 0;
2577 if (buffer_new(bh))
2578 unmap_underlying_metadata(bh->b_bdev, bh->b_blocknr);
2579 if (PageUptodate(page)) {
2580 set_buffer_uptodate(bh);
2581 continue;
2582 }
2583 if (buffer_new(bh) || !buffer_mapped(bh)) {
2584 zero_user_segments(page, block_start, from,
2585 to, block_end);
2586 continue;
2587 }
2588 if (buffer_uptodate(bh))
2589 continue;
2590 if (block_start < from || block_end > to) {
2591 lock_buffer(bh);
2592 bh->b_end_io = end_buffer_read_nobh;
2593 submit_bh(READ, bh);
2594 nr_reads++;
2595 }
2596 }
2597
2598 if (nr_reads) {
2599
2600
2601
2602
2603
2604 for (bh = head; bh; bh = bh->b_this_page) {
2605 wait_on_buffer(bh);
2606 if (!buffer_uptodate(bh))
2607 ret = -EIO;
2608 }
2609 if (ret)
2610 goto failed;
2611 }
2612
2613 if (is_mapped_to_disk)
2614 SetPageMappedToDisk(page);
2615
2616 *fsdata = head;
2617
2618 return 0;
2619
2620failed:
2621 BUG_ON(!ret);
2622
2623
2624
2625
2626
2627
2628
2629 attach_nobh_buffers(page, head);
2630 page_zero_new_buffers(page, from, to);
2631
2632out_release:
2633 unlock_page(page);
2634 page_cache_release(page);
2635 *pagep = NULL;
2636
2637 return ret;
2638}
2639EXPORT_SYMBOL(nobh_write_begin);
2640
2641int nobh_write_end(struct file *file, struct address_space *mapping,
2642 loff_t pos, unsigned len, unsigned copied,
2643 struct page *page, void *fsdata)
2644{
2645 struct inode *inode = page->mapping->host;
2646 struct buffer_head *head = fsdata;
2647 struct buffer_head *bh;
2648 BUG_ON(fsdata != NULL && page_has_buffers(page));
2649
2650 if (unlikely(copied < len) && head)
2651 attach_nobh_buffers(page, head);
2652 if (page_has_buffers(page))
2653 return generic_write_end(file, mapping, pos, len,
2654 copied, page, fsdata);
2655
2656 SetPageUptodate(page);
2657 set_page_dirty(page);
2658 if (pos+copied > inode->i_size) {
2659 i_size_write(inode, pos+copied);
2660 mark_inode_dirty(inode);
2661 }
2662
2663 unlock_page(page);
2664 page_cache_release(page);
2665
2666 while (head) {
2667 bh = head;
2668 head = head->b_this_page;
2669 free_buffer_head(bh);
2670 }
2671
2672 return copied;
2673}
2674EXPORT_SYMBOL(nobh_write_end);
2675
2676
2677
2678
2679
2680
2681int nobh_writepage(struct page *page, get_block_t *get_block,
2682 struct writeback_control *wbc)
2683{
2684 struct inode * const inode = page->mapping->host;
2685 loff_t i_size = i_size_read(inode);
2686 const pgoff_t end_index = i_size >> PAGE_CACHE_SHIFT;
2687 unsigned offset;
2688 int ret;
2689
2690
2691 if (page->index < end_index)
2692 goto out;
2693
2694
2695 offset = i_size & (PAGE_CACHE_SIZE-1);
2696 if (page->index >= end_index+1 || !offset) {
2697
2698
2699
2700
2701
2702#if 0
2703
2704 if (page->mapping->a_ops->invalidatepage)
2705 page->mapping->a_ops->invalidatepage(page, offset);
2706#endif
2707 unlock_page(page);
2708 return 0;
2709 }
2710
2711
2712
2713
2714
2715
2716
2717
2718 zero_user_segment(page, offset, PAGE_CACHE_SIZE);
2719out:
2720 ret = mpage_writepage(page, get_block, wbc);
2721 if (ret == -EAGAIN)
2722 ret = __block_write_full_page(inode, page, get_block, wbc,
2723 end_buffer_async_write);
2724 return ret;
2725}
2726EXPORT_SYMBOL(nobh_writepage);
2727
2728int nobh_truncate_page(struct address_space *mapping,
2729 loff_t from, get_block_t *get_block)
2730{
2731 pgoff_t index = from >> PAGE_CACHE_SHIFT;
2732 unsigned offset = from & (PAGE_CACHE_SIZE-1);
2733 unsigned blocksize;
2734 sector_t iblock;
2735 unsigned length, pos;
2736 struct inode *inode = mapping->host;
2737 struct page *page;
2738 struct buffer_head map_bh;
2739 int err;
2740
2741 blocksize = 1 << inode->i_blkbits;
2742 length = offset & (blocksize - 1);
2743
2744
2745 if (!length)
2746 return 0;
2747
2748 length = blocksize - length;
2749 iblock = (sector_t)index << (PAGE_CACHE_SHIFT - inode->i_blkbits);
2750
2751 page = grab_cache_page(mapping, index);
2752 err = -ENOMEM;
2753 if (!page)
2754 goto out;
2755
2756 if (page_has_buffers(page)) {
2757has_buffers:
2758 unlock_page(page);
2759 page_cache_release(page);
2760 return block_truncate_page(mapping, from, get_block);
2761 }
2762
2763
2764 pos = blocksize;
2765 while (offset >= pos) {
2766 iblock++;
2767 pos += blocksize;
2768 }
2769
2770 map_bh.b_size = blocksize;
2771 map_bh.b_state = 0;
2772 err = get_block(inode, iblock, &map_bh, 0);
2773 if (err)
2774 goto unlock;
2775
2776 if (!buffer_mapped(&map_bh))
2777 goto unlock;
2778
2779
2780 if (!PageUptodate(page)) {
2781 err = mapping->a_ops->readpage(NULL, page);
2782 if (err) {
2783 page_cache_release(page);
2784 goto out;
2785 }
2786 lock_page(page);
2787 if (!PageUptodate(page)) {
2788 err = -EIO;
2789 goto unlock;
2790 }
2791 if (page_has_buffers(page))
2792 goto has_buffers;
2793 }
2794 zero_user(page, offset, length);
2795 set_page_dirty(page);
2796 err = 0;
2797
2798unlock:
2799 unlock_page(page);
2800 page_cache_release(page);
2801out:
2802 return err;
2803}
2804EXPORT_SYMBOL(nobh_truncate_page);
2805
2806int block_truncate_page(struct address_space *mapping,
2807 loff_t from, get_block_t *get_block)
2808{
2809 pgoff_t index = from >> PAGE_CACHE_SHIFT;
2810 unsigned offset = from & (PAGE_CACHE_SIZE-1);
2811 unsigned blocksize;
2812 sector_t iblock;
2813 unsigned length, pos;
2814 struct inode *inode = mapping->host;
2815 struct page *page;
2816 struct buffer_head *bh;
2817 int err;
2818
2819 blocksize = 1 << inode->i_blkbits;
2820 length = offset & (blocksize - 1);
2821
2822
2823 if (!length)
2824 return 0;
2825
2826 length = blocksize - length;
2827 iblock = (sector_t)index << (PAGE_CACHE_SHIFT - inode->i_blkbits);
2828
2829 page = grab_cache_page(mapping, index);
2830 err = -ENOMEM;
2831 if (!page)
2832 goto out;
2833
2834 if (!page_has_buffers(page))
2835 create_empty_buffers(page, blocksize, 0);
2836
2837
2838 bh = page_buffers(page);
2839 pos = blocksize;
2840 while (offset >= pos) {
2841 bh = bh->b_this_page;
2842 iblock++;
2843 pos += blocksize;
2844 }
2845
2846 err = 0;
2847 if (!buffer_mapped(bh)) {
2848 WARN_ON(bh->b_size != blocksize);
2849 err = get_block(inode, iblock, bh, 0);
2850 if (err)
2851 goto unlock;
2852
2853 if (!buffer_mapped(bh))
2854 goto unlock;
2855 }
2856
2857
2858 if (PageUptodate(page))
2859 set_buffer_uptodate(bh);
2860
2861 if (!buffer_uptodate(bh) && !buffer_delay(bh) && !buffer_unwritten(bh)) {
2862 err = -EIO;
2863 ll_rw_block(READ, 1, &bh);
2864 wait_on_buffer(bh);
2865
2866 if (!buffer_uptodate(bh))
2867 goto unlock;
2868 }
2869
2870 zero_user(page, offset, length);
2871 mark_buffer_dirty(bh);
2872 err = 0;
2873
2874unlock:
2875 unlock_page(page);
2876 page_cache_release(page);
2877out:
2878 return err;
2879}
2880EXPORT_SYMBOL(block_truncate_page);
2881
2882
2883
2884
2885int block_write_full_page(struct page *page, get_block_t *get_block,
2886 struct writeback_control *wbc)
2887{
2888 struct inode * const inode = page->mapping->host;
2889 loff_t i_size = i_size_read(inode);
2890 const pgoff_t end_index = i_size >> PAGE_CACHE_SHIFT;
2891 unsigned offset;
2892
2893
2894 if (page->index < end_index)
2895 return __block_write_full_page(inode, page, get_block, wbc,
2896 end_buffer_async_write);
2897
2898
2899 offset = i_size & (PAGE_CACHE_SIZE-1);
2900 if (page->index >= end_index+1 || !offset) {
2901
2902
2903
2904
2905
2906 do_invalidatepage(page, 0, PAGE_CACHE_SIZE);
2907 unlock_page(page);
2908 return 0;
2909 }
2910
2911
2912
2913
2914
2915
2916
2917
2918 zero_user_segment(page, offset, PAGE_CACHE_SIZE);
2919 return __block_write_full_page(inode, page, get_block, wbc,
2920 end_buffer_async_write);
2921}
2922EXPORT_SYMBOL(block_write_full_page);
2923
2924sector_t generic_block_bmap(struct address_space *mapping, sector_t block,
2925 get_block_t *get_block)
2926{
2927 struct buffer_head tmp;
2928 struct inode *inode = mapping->host;
2929 tmp.b_state = 0;
2930 tmp.b_blocknr = 0;
2931 tmp.b_size = 1 << inode->i_blkbits;
2932 get_block(inode, block, &tmp, 0);
2933 return tmp.b_blocknr;
2934}
2935EXPORT_SYMBOL(generic_block_bmap);
2936
2937static void end_bio_bh_io_sync(struct bio *bio, int err)
2938{
2939 struct buffer_head *bh = bio->bi_private;
2940
2941 if (err == -EOPNOTSUPP) {
2942 set_bit(BIO_EOPNOTSUPP, &bio->bi_flags);
2943 }
2944
2945 if (unlikely (test_bit(BIO_QUIET,&bio->bi_flags)))
2946 set_bit(BH_Quiet, &bh->b_state);
2947
2948 bh->b_end_io(bh, test_bit(BIO_UPTODATE, &bio->bi_flags));
2949 bio_put(bio);
2950}
2951
2952
2953
2954
2955
2956
2957
2958
2959
2960
2961
2962
2963
2964void guard_bio_eod(int rw, struct bio *bio)
2965{
2966 sector_t maxsector;
2967 struct bio_vec *bvec = &bio->bi_io_vec[bio->bi_vcnt - 1];
2968 unsigned truncated_bytes;
2969
2970 maxsector = i_size_read(bio->bi_bdev->bd_inode) >> 9;
2971 if (!maxsector)
2972 return;
2973
2974
2975
2976
2977
2978
2979 if (unlikely(bio->bi_iter.bi_sector >= maxsector))
2980 return;
2981
2982 maxsector -= bio->bi_iter.bi_sector;
2983 if (likely((bio->bi_iter.bi_size >> 9) <= maxsector))
2984 return;
2985
2986
2987 truncated_bytes = bio->bi_iter.bi_size - (maxsector << 9);
2988
2989
2990 bio->bi_iter.bi_size -= truncated_bytes;
2991 bvec->bv_len -= truncated_bytes;
2992
2993
2994 if ((rw & RW_MASK) == READ) {
2995 zero_user(bvec->bv_page, bvec->bv_offset + bvec->bv_len,
2996 truncated_bytes);
2997 }
2998}
2999
3000int _submit_bh(int rw, struct buffer_head *bh, unsigned long bio_flags)
3001{
3002 struct bio *bio;
3003 int ret = 0;
3004
3005 BUG_ON(!buffer_locked(bh));
3006 BUG_ON(!buffer_mapped(bh));
3007 BUG_ON(!bh->b_end_io);
3008 BUG_ON(buffer_delay(bh));
3009 BUG_ON(buffer_unwritten(bh));
3010
3011
3012
3013
3014 if (test_set_buffer_req(bh) && (rw & WRITE))
3015 clear_buffer_write_io_error(bh);
3016
3017
3018
3019
3020
3021 bio = bio_alloc(GFP_NOIO, 1);
3022
3023 bio->bi_iter.bi_sector = bh->b_blocknr * (bh->b_size >> 9);
3024 bio->bi_bdev = bh->b_bdev;
3025 bio->bi_io_vec[0].bv_page = bh->b_page;
3026 bio->bi_io_vec[0].bv_len = bh->b_size;
3027 bio->bi_io_vec[0].bv_offset = bh_offset(bh);
3028
3029 bio->bi_vcnt = 1;
3030 bio->bi_iter.bi_size = bh->b_size;
3031
3032 bio->bi_end_io = end_bio_bh_io_sync;
3033 bio->bi_private = bh;
3034 bio->bi_flags |= bio_flags;
3035
3036
3037 guard_bio_eod(rw, bio);
3038
3039 if (buffer_meta(bh))
3040 rw |= REQ_META;
3041 if (buffer_prio(bh))
3042 rw |= REQ_PRIO;
3043
3044 bio_get(bio);
3045 submit_bio(rw, bio);
3046
3047 if (bio_flagged(bio, BIO_EOPNOTSUPP))
3048 ret = -EOPNOTSUPP;
3049
3050 bio_put(bio);
3051 return ret;
3052}
3053EXPORT_SYMBOL_GPL(_submit_bh);
3054
3055int submit_bh(int rw, struct buffer_head *bh)
3056{
3057 return _submit_bh(rw, bh, 0);
3058}
3059EXPORT_SYMBOL(submit_bh);
3060
3061
3062
3063
3064
3065
3066
3067
3068
3069
3070
3071
3072
3073
3074
3075
3076
3077
3078
3079
3080
3081
3082
3083
3084
3085
3086void ll_rw_block(int rw, int nr, struct buffer_head *bhs[])
3087{
3088 int i;
3089
3090 for (i = 0; i < nr; i++) {
3091 struct buffer_head *bh = bhs[i];
3092
3093 if (!trylock_buffer(bh))
3094 continue;
3095 if (rw == WRITE) {
3096 if (test_clear_buffer_dirty(bh)) {
3097 bh->b_end_io = end_buffer_write_sync;
3098 get_bh(bh);
3099 submit_bh(WRITE, bh);
3100 continue;
3101 }
3102 } else {
3103 if (!buffer_uptodate(bh)) {
3104 bh->b_end_io = end_buffer_read_sync;
3105 get_bh(bh);
3106 submit_bh(rw, bh);
3107 continue;
3108 }
3109 }
3110 unlock_buffer(bh);
3111 }
3112}
3113EXPORT_SYMBOL(ll_rw_block);
3114
3115void write_dirty_buffer(struct buffer_head *bh, int rw)
3116{
3117 lock_buffer(bh);
3118 if (!test_clear_buffer_dirty(bh)) {
3119 unlock_buffer(bh);
3120 return;
3121 }
3122 bh->b_end_io = end_buffer_write_sync;
3123 get_bh(bh);
3124 submit_bh(rw, bh);
3125}
3126EXPORT_SYMBOL(write_dirty_buffer);
3127
3128
3129
3130
3131
3132
3133int __sync_dirty_buffer(struct buffer_head *bh, int rw)
3134{
3135 int ret = 0;
3136
3137 WARN_ON(atomic_read(&bh->b_count) < 1);
3138 lock_buffer(bh);
3139 if (test_clear_buffer_dirty(bh)) {
3140 get_bh(bh);
3141 bh->b_end_io = end_buffer_write_sync;
3142 ret = submit_bh(rw, bh);
3143 wait_on_buffer(bh);
3144 if (!ret && !buffer_uptodate(bh))
3145 ret = -EIO;
3146 } else {
3147 unlock_buffer(bh);
3148 }
3149 return ret;
3150}
3151EXPORT_SYMBOL(__sync_dirty_buffer);
3152
3153int sync_dirty_buffer(struct buffer_head *bh)
3154{
3155 return __sync_dirty_buffer(bh, WRITE_SYNC);
3156}
3157EXPORT_SYMBOL(sync_dirty_buffer);
3158
3159
3160
3161
3162
3163
3164
3165
3166
3167
3168
3169
3170
3171
3172
3173
3174
3175
3176
3177
3178
3179static inline int buffer_busy(struct buffer_head *bh)
3180{
3181 return atomic_read(&bh->b_count) |
3182 (bh->b_state & ((1 << BH_Dirty) | (1 << BH_Lock)));
3183}
3184
3185static int
3186drop_buffers(struct page *page, struct buffer_head **buffers_to_free)
3187{
3188 struct buffer_head *head = page_buffers(page);
3189 struct buffer_head *bh;
3190
3191 bh = head;
3192 do {
3193 if (buffer_write_io_error(bh) && page->mapping)
3194 set_bit(AS_EIO, &page->mapping->flags);
3195 if (buffer_busy(bh))
3196 goto failed;
3197 bh = bh->b_this_page;
3198 } while (bh != head);
3199
3200 do {
3201 struct buffer_head *next = bh->b_this_page;
3202
3203 if (bh->b_assoc_map)
3204 __remove_assoc_queue(bh);
3205 bh = next;
3206 } while (bh != head);
3207 *buffers_to_free = head;
3208 __clear_page_buffers(page);
3209 return 1;
3210failed:
3211 return 0;
3212}
3213
3214int try_to_free_buffers(struct page *page)
3215{
3216 struct address_space * const mapping = page->mapping;
3217 struct buffer_head *buffers_to_free = NULL;
3218 int ret = 0;
3219
3220 BUG_ON(!PageLocked(page));
3221 if (PageWriteback(page))
3222 return 0;
3223
3224 if (mapping == NULL) {
3225 ret = drop_buffers(page, &buffers_to_free);
3226 goto out;
3227 }
3228
3229 spin_lock(&mapping->private_lock);
3230 ret = drop_buffers(page, &buffers_to_free);
3231
3232
3233
3234
3235
3236
3237
3238
3239
3240
3241
3242
3243
3244
3245
3246 if (ret)
3247 cancel_dirty_page(page, PAGE_CACHE_SIZE);
3248 spin_unlock(&mapping->private_lock);
3249out:
3250 if (buffers_to_free) {
3251 struct buffer_head *bh = buffers_to_free;
3252
3253 do {
3254 struct buffer_head *next = bh->b_this_page;
3255 free_buffer_head(bh);
3256 bh = next;
3257 } while (bh != buffers_to_free);
3258 }
3259 return ret;
3260}
3261EXPORT_SYMBOL(try_to_free_buffers);
3262
3263
3264
3265
3266
3267
3268
3269
3270SYSCALL_DEFINE2(bdflush, int, func, long, data)
3271{
3272 static int msg_count;
3273
3274 if (!capable(CAP_SYS_ADMIN))
3275 return -EPERM;
3276
3277 if (msg_count < 5) {
3278 msg_count++;
3279 printk(KERN_INFO
3280 "warning: process `%s' used the obsolete bdflush"
3281 " system call\n", current->comm);
3282 printk(KERN_INFO "Fix your initscripts?\n");
3283 }
3284
3285 if (func == 1)
3286 do_exit(0);
3287 return 0;
3288}
3289
3290
3291
3292
3293static struct kmem_cache *bh_cachep __read_mostly;
3294
3295
3296
3297
3298
3299static unsigned long max_buffer_heads;
3300
3301int buffer_heads_over_limit;
3302
3303struct bh_accounting {
3304 int nr;
3305 int ratelimit;
3306};
3307
3308static DEFINE_PER_CPU(struct bh_accounting, bh_accounting) = {0, 0};
3309
3310static void recalc_bh_state(void)
3311{
3312 int i;
3313 int tot = 0;
3314
3315 if (__this_cpu_inc_return(bh_accounting.ratelimit) - 1 < 4096)
3316 return;
3317 __this_cpu_write(bh_accounting.ratelimit, 0);
3318 for_each_online_cpu(i)
3319 tot += per_cpu(bh_accounting, i).nr;
3320 buffer_heads_over_limit = (tot > max_buffer_heads);
3321}
3322
3323struct buffer_head *alloc_buffer_head(gfp_t gfp_flags)
3324{
3325 struct buffer_head *ret = kmem_cache_zalloc(bh_cachep, gfp_flags);
3326 if (ret) {
3327 INIT_LIST_HEAD(&ret->b_assoc_buffers);
3328 preempt_disable();
3329 __this_cpu_inc(bh_accounting.nr);
3330 recalc_bh_state();
3331 preempt_enable();
3332 }
3333 return ret;
3334}
3335EXPORT_SYMBOL(alloc_buffer_head);
3336
3337void free_buffer_head(struct buffer_head *bh)
3338{
3339 BUG_ON(!list_empty(&bh->b_assoc_buffers));
3340 kmem_cache_free(bh_cachep, bh);
3341 preempt_disable();
3342 __this_cpu_dec(bh_accounting.nr);
3343 recalc_bh_state();
3344 preempt_enable();
3345}
3346EXPORT_SYMBOL(free_buffer_head);
3347
3348static void buffer_exit_cpu(int cpu)
3349{
3350 int i;
3351 struct bh_lru *b = &per_cpu(bh_lrus, cpu);
3352
3353 for (i = 0; i < BH_LRU_SIZE; i++) {
3354 brelse(b->bhs[i]);
3355 b->bhs[i] = NULL;
3356 }
3357 this_cpu_add(bh_accounting.nr, per_cpu(bh_accounting, cpu).nr);
3358 per_cpu(bh_accounting, cpu).nr = 0;
3359}
3360
3361static int buffer_cpu_notify(struct notifier_block *self,
3362 unsigned long action, void *hcpu)
3363{
3364 if (action == CPU_DEAD || action == CPU_DEAD_FROZEN)
3365 buffer_exit_cpu((unsigned long)hcpu);
3366 return NOTIFY_OK;
3367}
3368
3369
3370
3371
3372
3373
3374
3375
3376int bh_uptodate_or_lock(struct buffer_head *bh)
3377{
3378 if (!buffer_uptodate(bh)) {
3379 lock_buffer(bh);
3380 if (!buffer_uptodate(bh))
3381 return 0;
3382 unlock_buffer(bh);
3383 }
3384 return 1;
3385}
3386EXPORT_SYMBOL(bh_uptodate_or_lock);
3387
3388
3389
3390
3391
3392
3393
3394int bh_submit_read(struct buffer_head *bh)
3395{
3396 BUG_ON(!buffer_locked(bh));
3397
3398 if (buffer_uptodate(bh)) {
3399 unlock_buffer(bh);
3400 return 0;
3401 }
3402
3403 get_bh(bh);
3404 bh->b_end_io = end_buffer_read_sync;
3405 submit_bh(READ, bh);
3406 wait_on_buffer(bh);
3407 if (buffer_uptodate(bh))
3408 return 0;
3409 return -EIO;
3410}
3411EXPORT_SYMBOL(bh_submit_read);
3412
3413void __init buffer_init(void)
3414{
3415 unsigned long nrpages;
3416
3417 bh_cachep = kmem_cache_create("buffer_head",
3418 sizeof(struct buffer_head), 0,
3419 (SLAB_RECLAIM_ACCOUNT|SLAB_PANIC|
3420 SLAB_MEM_SPREAD),
3421 NULL);
3422
3423
3424
3425
3426 nrpages = (nr_free_buffer_pages() * 10) / 100;
3427 max_buffer_heads = nrpages * (PAGE_SIZE / sizeof(struct buffer_head));
3428 hotcpu_notifier(buffer_cpu_notify, 0);
3429}
3430