1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21#include <linux/kernel.h>
22#include <linux/syscalls.h>
23#include <linux/fs.h>
24#include <linux/iomap.h>
25#include <linux/mm.h>
26#include <linux/percpu.h>
27#include <linux/slab.h>
28#include <linux/capability.h>
29#include <linux/blkdev.h>
30#include <linux/file.h>
31#include <linux/quotaops.h>
32#include <linux/highmem.h>
33#include <linux/export.h>
34#include <linux/writeback.h>
35#include <linux/hash.h>
36#include <linux/suspend.h>
37#include <linux/buffer_head.h>
38#include <linux/task_io_accounting_ops.h>
39#include <linux/bio.h>
40#include <linux/notifier.h>
41#include <linux/cpu.h>
42#include <linux/bitops.h>
43#include <linux/mpage.h>
44#include <linux/bit_spinlock.h>
45#include <linux/pagevec.h>
46#include <trace/events/block.h>
47
48static int fsync_buffers_list(spinlock_t *lock, struct list_head *list);
49
50#define BH_ENTRY(list) list_entry((list), struct buffer_head, b_assoc_buffers)
51
52void init_buffer(struct buffer_head *bh, bh_end_io_t *handler, void *private)
53{
54 bh->b_end_io = handler;
55 bh->b_private = private;
56}
57EXPORT_SYMBOL(init_buffer);
58
59inline void touch_buffer(struct buffer_head *bh)
60{
61 trace_block_touch_buffer(bh);
62 mark_page_accessed(bh->b_page);
63}
64EXPORT_SYMBOL(touch_buffer);
65
66void __lock_buffer(struct buffer_head *bh)
67{
68 wait_on_bit_lock_io(&bh->b_state, BH_Lock, TASK_UNINTERRUPTIBLE);
69}
70EXPORT_SYMBOL(__lock_buffer);
71
72void unlock_buffer(struct buffer_head *bh)
73{
74 clear_bit_unlock(BH_Lock, &bh->b_state);
75 smp_mb__after_clear_bit();
76 wake_up_bit(&bh->b_state, BH_Lock);
77}
78EXPORT_SYMBOL(unlock_buffer);
79
80
81
82
83
84
85void buffer_check_dirty_writeback(struct page *page,
86 bool *dirty, bool *writeback)
87{
88 struct buffer_head *head, *bh;
89 *dirty = false;
90 *writeback = false;
91
92 BUG_ON(!PageLocked(page));
93
94 if (!page_has_buffers(page))
95 return;
96
97 if (PageWriteback(page))
98 *writeback = true;
99
100 head = page_buffers(page);
101 bh = head;
102 do {
103 if (buffer_locked(bh))
104 *writeback = true;
105
106 if (buffer_dirty(bh))
107 *dirty = true;
108
109 bh = bh->b_this_page;
110 } while (bh != head);
111}
112EXPORT_SYMBOL(buffer_check_dirty_writeback);
113
114
115
116
117
118
119void __wait_on_buffer(struct buffer_head * bh)
120{
121 wait_on_bit_io(&bh->b_state, BH_Lock, TASK_UNINTERRUPTIBLE);
122}
123EXPORT_SYMBOL(__wait_on_buffer);
124
125static void
126__clear_page_buffers(struct page *page)
127{
128 ClearPagePrivate(page);
129 set_page_private(page, 0);
130 page_cache_release(page);
131}
132
133static void buffer_io_error(struct buffer_head *bh, char *msg)
134{
135 char b[BDEVNAME_SIZE];
136
137 if (!test_bit(BH_Quiet, &bh->b_state))
138 printk_ratelimited(KERN_ERR
139 "Buffer I/O error on dev %s, logical block %llu%s\n",
140 bdevname(bh->b_bdev, b),
141 (unsigned long long)bh->b_blocknr, msg);
142}
143
144
145
146
147
148
149
150
151
152static void __end_buffer_read_notouch(struct buffer_head *bh, int uptodate)
153{
154 if (uptodate) {
155 set_buffer_uptodate(bh);
156 } else {
157
158 clear_buffer_uptodate(bh);
159 }
160 unlock_buffer(bh);
161}
162
163
164
165
166
167void end_buffer_read_sync(struct buffer_head *bh, int uptodate)
168{
169 __end_buffer_read_notouch(bh, uptodate);
170 put_bh(bh);
171}
172EXPORT_SYMBOL(end_buffer_read_sync);
173
174void end_buffer_write_sync(struct buffer_head *bh, int uptodate)
175{
176 if (uptodate) {
177 set_buffer_uptodate(bh);
178 } else {
179 buffer_io_error(bh, ", lost sync page write");
180 set_buffer_write_io_error(bh);
181 clear_buffer_uptodate(bh);
182 }
183 unlock_buffer(bh);
184 put_bh(bh);
185}
186EXPORT_SYMBOL(end_buffer_write_sync);
187
188
189
190
191
192
193
194
195
196
197
198
199static struct buffer_head *
200__find_get_block_slow(struct block_device *bdev, sector_t block)
201{
202 struct inode *bd_inode = bdev->bd_inode;
203 struct address_space *bd_mapping = bd_inode->i_mapping;
204 struct buffer_head *ret = NULL;
205 pgoff_t index;
206 struct buffer_head *bh;
207 struct buffer_head *head;
208 struct page *page;
209 int all_mapped = 1;
210
211 index = block >> (PAGE_CACHE_SHIFT - bd_inode->i_blkbits);
212 page = find_get_page(bd_mapping, index);
213 if (!page)
214 goto out;
215
216 spin_lock(&bd_mapping->private_lock);
217 if (!page_has_buffers(page))
218 goto out_unlock;
219 head = page_buffers(page);
220 bh = head;
221 do {
222 if (!buffer_mapped(bh))
223 all_mapped = 0;
224 else if (bh->b_blocknr == block) {
225 ret = bh;
226 get_bh(bh);
227 goto out_unlock;
228 }
229 bh = bh->b_this_page;
230 } while (bh != head);
231
232
233
234
235
236
237 if (all_mapped) {
238 char b[BDEVNAME_SIZE];
239
240 printk("__find_get_block_slow() failed. "
241 "block=%llu, b_blocknr=%llu\n",
242 (unsigned long long)block,
243 (unsigned long long)bh->b_blocknr);
244 printk("b_state=0x%08lx, b_size=%zu\n",
245 bh->b_state, bh->b_size);
246 printk("device %s blocksize: %d\n", bdevname(bdev, b),
247 1 << bd_inode->i_blkbits);
248 }
249out_unlock:
250 spin_unlock(&bd_mapping->private_lock);
251 page_cache_release(page);
252out:
253 return ret;
254}
255
256
257
258
259static void free_more_memory(void)
260{
261 struct zone *zone;
262 int nid;
263
264 wakeup_flusher_threads(1024, WB_REASON_FREE_MORE_MEM);
265 yield();
266
267 for_each_online_node(nid) {
268 (void)first_zones_zonelist(node_zonelist(nid, GFP_NOFS),
269 gfp_zone(GFP_NOFS), NULL,
270 &zone);
271 if (zone)
272 try_to_free_pages(node_zonelist(nid, GFP_NOFS), 0,
273 GFP_NOFS, NULL);
274 }
275}
276
277
278
279
280
281static void end_buffer_async_read(struct buffer_head *bh, int uptodate)
282{
283 unsigned long flags;
284 struct buffer_head *first;
285 struct buffer_head *tmp;
286 struct page *page;
287 int page_uptodate = 1;
288
289 BUG_ON(!buffer_async_read(bh));
290
291 page = bh->b_page;
292 if (uptodate) {
293 set_buffer_uptodate(bh);
294 } else {
295 clear_buffer_uptodate(bh);
296 buffer_io_error(bh, ", async page read");
297 SetPageError(page);
298 }
299
300
301
302
303
304
305 first = page_buffers(page);
306 local_irq_save(flags);
307 bit_spin_lock(BH_Uptodate_Lock, &first->b_state);
308 clear_buffer_async_read(bh);
309 unlock_buffer(bh);
310 tmp = bh;
311 do {
312 if (!buffer_uptodate(tmp))
313 page_uptodate = 0;
314 if (buffer_async_read(tmp)) {
315 BUG_ON(!buffer_locked(tmp));
316 goto still_busy;
317 }
318 tmp = tmp->b_this_page;
319 } while (tmp != bh);
320 bit_spin_unlock(BH_Uptodate_Lock, &first->b_state);
321 local_irq_restore(flags);
322
323
324
325
326
327 if (page_uptodate && !PageError(page))
328 SetPageUptodate(page);
329 unlock_page(page);
330 return;
331
332still_busy:
333 bit_spin_unlock(BH_Uptodate_Lock, &first->b_state);
334 local_irq_restore(flags);
335 return;
336}
337
338
339
340
341
342void end_buffer_async_write(struct buffer_head *bh, int uptodate)
343{
344 unsigned long flags;
345 struct buffer_head *first;
346 struct buffer_head *tmp;
347 struct page *page;
348
349 BUG_ON(!buffer_async_write(bh));
350
351 page = bh->b_page;
352 if (uptodate) {
353 set_buffer_uptodate(bh);
354 } else {
355 buffer_io_error(bh, ", lost async page write");
356 set_bit(AS_EIO, &page->mapping->flags);
357 set_buffer_write_io_error(bh);
358 clear_buffer_uptodate(bh);
359 SetPageError(page);
360 }
361
362 first = page_buffers(page);
363 local_irq_save(flags);
364 bit_spin_lock(BH_Uptodate_Lock, &first->b_state);
365
366 clear_buffer_async_write(bh);
367 unlock_buffer(bh);
368 tmp = bh->b_this_page;
369 while (tmp != bh) {
370 if (buffer_async_write(tmp)) {
371 BUG_ON(!buffer_locked(tmp));
372 goto still_busy;
373 }
374 tmp = tmp->b_this_page;
375 }
376 bit_spin_unlock(BH_Uptodate_Lock, &first->b_state);
377 local_irq_restore(flags);
378 end_page_writeback(page);
379 return;
380
381still_busy:
382 bit_spin_unlock(BH_Uptodate_Lock, &first->b_state);
383 local_irq_restore(flags);
384 return;
385}
386EXPORT_SYMBOL(end_buffer_async_write);
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409static void mark_buffer_async_read(struct buffer_head *bh)
410{
411 bh->b_end_io = end_buffer_async_read;
412 set_buffer_async_read(bh);
413}
414
415static void mark_buffer_async_write_endio(struct buffer_head *bh,
416 bh_end_io_t *handler)
417{
418 bh->b_end_io = handler;
419 set_buffer_async_write(bh);
420}
421
422void mark_buffer_async_write(struct buffer_head *bh)
423{
424 mark_buffer_async_write_endio(bh, end_buffer_async_write);
425}
426EXPORT_SYMBOL(mark_buffer_async_write);
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481static void __remove_assoc_queue(struct buffer_head *bh)
482{
483 list_del_init(&bh->b_assoc_buffers);
484 WARN_ON(!bh->b_assoc_map);
485 if (buffer_write_io_error(bh))
486 set_bit(AS_EIO, &bh->b_assoc_map->flags);
487 bh->b_assoc_map = NULL;
488}
489
490int inode_has_buffers(struct inode *inode)
491{
492 return !list_empty(&inode->i_data.private_list);
493}
494
495
496
497
498
499
500
501
502
503
504
505static int osync_buffers_list(spinlock_t *lock, struct list_head *list)
506{
507 struct buffer_head *bh;
508 struct list_head *p;
509 int err = 0;
510
511 spin_lock(lock);
512repeat:
513 list_for_each_prev(p, list) {
514 bh = BH_ENTRY(p);
515 if (buffer_locked(bh)) {
516 get_bh(bh);
517 spin_unlock(lock);
518 wait_on_buffer(bh);
519 if (!buffer_uptodate(bh))
520 err = -EIO;
521 brelse(bh);
522 spin_lock(lock);
523 goto repeat;
524 }
525 }
526 spin_unlock(lock);
527 return err;
528}
529
530void emergency_thaw_bdev(struct super_block *sb)
531{
532 char b[BDEVNAME_SIZE];
533 while (sb->s_bdev && !thaw_bdev(sb->s_bdev, sb))
534 printk(KERN_WARNING "Emergency Thaw on %s\n",
535 bdevname(sb->s_bdev, b));
536}
537
538
539
540
541
542
543
544
545
546
547
548
549int sync_mapping_buffers(struct address_space *mapping)
550{
551 struct address_space *buffer_mapping = mapping->private_data;
552
553 if (buffer_mapping == NULL || list_empty(&mapping->private_list))
554 return 0;
555
556 return fsync_buffers_list(&buffer_mapping->private_lock,
557 &mapping->private_list);
558}
559EXPORT_SYMBOL(sync_mapping_buffers);
560
561
562
563
564
565
566
567void write_boundary_block(struct block_device *bdev,
568 sector_t bblock, unsigned blocksize)
569{
570 struct buffer_head *bh = __find_get_block(bdev, bblock + 1, blocksize);
571 if (bh) {
572 if (buffer_dirty(bh))
573 ll_rw_block(WRITE, 1, &bh);
574 put_bh(bh);
575 }
576}
577
578void mark_buffer_dirty_inode(struct buffer_head *bh, struct inode *inode)
579{
580 struct address_space *mapping = inode->i_mapping;
581 struct address_space *buffer_mapping = bh->b_page->mapping;
582
583 mark_buffer_dirty(bh);
584 if (!mapping->private_data) {
585 mapping->private_data = buffer_mapping;
586 } else {
587 BUG_ON(mapping->private_data != buffer_mapping);
588 }
589 if (!bh->b_assoc_map) {
590 spin_lock(&buffer_mapping->private_lock);
591 list_move_tail(&bh->b_assoc_buffers,
592 &mapping->private_list);
593 bh->b_assoc_map = mapping;
594 spin_unlock(&buffer_mapping->private_lock);
595 }
596}
597EXPORT_SYMBOL(mark_buffer_dirty_inode);
598
599
600
601
602
603
604
605
606static void __set_page_dirty(struct page *page,
607 struct address_space *mapping, int warn)
608{
609 unsigned long flags;
610
611 spin_lock_irqsave(&mapping->tree_lock, flags);
612 if (page->mapping) {
613 WARN_ON_ONCE(warn && !PageUptodate(page));
614 account_page_dirtied(page, mapping);
615 radix_tree_tag_set(&mapping->page_tree,
616 page_index(page), PAGECACHE_TAG_DIRTY);
617 }
618 spin_unlock_irqrestore(&mapping->tree_lock, flags);
619 __mark_inode_dirty(mapping->host, I_DIRTY_PAGES);
620}
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647int __set_page_dirty_buffers(struct page *page)
648{
649 int newly_dirty;
650 struct address_space *mapping = page_mapping(page);
651
652 if (unlikely(!mapping))
653 return !TestSetPageDirty(page);
654
655 spin_lock(&mapping->private_lock);
656 if (page_has_buffers(page)) {
657 struct buffer_head *head = page_buffers(page);
658 struct buffer_head *bh = head;
659
660 do {
661 set_buffer_dirty(bh);
662 bh = bh->b_this_page;
663 } while (bh != head);
664 }
665 newly_dirty = !TestSetPageDirty(page);
666 spin_unlock(&mapping->private_lock);
667
668 if (newly_dirty)
669 __set_page_dirty(page, mapping, 1);
670 return newly_dirty;
671}
672EXPORT_SYMBOL(__set_page_dirty_buffers);
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693static int fsync_buffers_list(spinlock_t *lock, struct list_head *list)
694{
695 struct buffer_head *bh;
696 struct list_head tmp;
697 struct address_space *mapping;
698 int err = 0, err2;
699 struct blk_plug plug;
700
701 INIT_LIST_HEAD(&tmp);
702 blk_start_plug(&plug);
703
704 spin_lock(lock);
705 while (!list_empty(list)) {
706 bh = BH_ENTRY(list->next);
707 mapping = bh->b_assoc_map;
708 __remove_assoc_queue(bh);
709
710
711 smp_mb();
712 if (buffer_dirty(bh) || buffer_locked(bh)) {
713 list_add(&bh->b_assoc_buffers, &tmp);
714 bh->b_assoc_map = mapping;
715 if (buffer_dirty(bh)) {
716 get_bh(bh);
717 spin_unlock(lock);
718
719
720
721
722
723
724
725 write_dirty_buffer(bh, WRITE_SYNC);
726
727
728
729
730
731
732
733 brelse(bh);
734 spin_lock(lock);
735 }
736 }
737 }
738
739 spin_unlock(lock);
740 blk_finish_plug(&plug);
741 spin_lock(lock);
742
743 while (!list_empty(&tmp)) {
744 bh = BH_ENTRY(tmp.prev);
745 get_bh(bh);
746 mapping = bh->b_assoc_map;
747 __remove_assoc_queue(bh);
748
749
750 smp_mb();
751 if (buffer_dirty(bh)) {
752 list_add(&bh->b_assoc_buffers,
753 &mapping->private_list);
754 bh->b_assoc_map = mapping;
755 }
756 spin_unlock(lock);
757 wait_on_buffer(bh);
758 if (!buffer_uptodate(bh))
759 err = -EIO;
760 brelse(bh);
761 spin_lock(lock);
762 }
763
764 spin_unlock(lock);
765 err2 = osync_buffers_list(lock, list);
766 if (err)
767 return err;
768 else
769 return err2;
770}
771
772
773
774
775
776
777
778
779
780
781void invalidate_inode_buffers(struct inode *inode)
782{
783 if (inode_has_buffers(inode)) {
784 struct address_space *mapping = &inode->i_data;
785 struct list_head *list = &mapping->private_list;
786 struct address_space *buffer_mapping = mapping->private_data;
787
788 spin_lock(&buffer_mapping->private_lock);
789 while (!list_empty(list))
790 __remove_assoc_queue(BH_ENTRY(list->next));
791 spin_unlock(&buffer_mapping->private_lock);
792 }
793}
794EXPORT_SYMBOL(invalidate_inode_buffers);
795
796
797
798
799
800
801
802int remove_inode_buffers(struct inode *inode)
803{
804 int ret = 1;
805
806 if (inode_has_buffers(inode)) {
807 struct address_space *mapping = &inode->i_data;
808 struct list_head *list = &mapping->private_list;
809 struct address_space *buffer_mapping = mapping->private_data;
810
811 spin_lock(&buffer_mapping->private_lock);
812 while (!list_empty(list)) {
813 struct buffer_head *bh = BH_ENTRY(list->next);
814 if (buffer_dirty(bh)) {
815 ret = 0;
816 break;
817 }
818 __remove_assoc_queue(bh);
819 }
820 spin_unlock(&buffer_mapping->private_lock);
821 }
822 return ret;
823}
824
825
826
827
828
829
830
831
832
833
834struct buffer_head *alloc_page_buffers(struct page *page, unsigned long size,
835 int retry)
836{
837 struct buffer_head *bh, *head;
838 long offset;
839
840try_again:
841 head = NULL;
842 offset = PAGE_SIZE;
843 while ((offset -= size) >= 0) {
844 bh = alloc_buffer_head(GFP_NOFS);
845 if (!bh)
846 goto no_grow;
847
848 bh->b_this_page = head;
849 bh->b_blocknr = -1;
850 head = bh;
851
852 bh->b_size = size;
853
854
855 set_bh_page(bh, page, offset);
856 }
857 return head;
858
859
860
861no_grow:
862 if (head) {
863 do {
864 bh = head;
865 head = head->b_this_page;
866 free_buffer_head(bh);
867 } while (head);
868 }
869
870
871
872
873
874
875
876 if (!retry)
877 return NULL;
878
879
880
881
882
883
884
885 free_more_memory();
886 goto try_again;
887}
888EXPORT_SYMBOL_GPL(alloc_page_buffers);
889
890static inline void
891link_dev_buffers(struct page *page, struct buffer_head *head)
892{
893 struct buffer_head *bh, *tail;
894
895 bh = head;
896 do {
897 tail = bh;
898 bh = bh->b_this_page;
899 } while (bh);
900 tail->b_this_page = head;
901 attach_page_buffers(page, head);
902}
903
904static sector_t blkdev_max_block(struct block_device *bdev, unsigned int size)
905{
906 sector_t retval = ~((sector_t)0);
907 loff_t sz = i_size_read(bdev->bd_inode);
908
909 if (sz) {
910 unsigned int sizebits = blksize_bits(size);
911 retval = (sz >> sizebits);
912 }
913 return retval;
914}
915
916
917
918
919static sector_t
920init_page_buffers(struct page *page, struct block_device *bdev,
921 sector_t block, int size)
922{
923 struct buffer_head *head = page_buffers(page);
924 struct buffer_head *bh = head;
925 int uptodate = PageUptodate(page);
926 sector_t end_block = blkdev_max_block(I_BDEV(bdev->bd_inode), size);
927
928 do {
929 if (!buffer_mapped(bh)) {
930 init_buffer(bh, NULL, NULL);
931 bh->b_bdev = bdev;
932 bh->b_blocknr = block;
933 if (uptodate)
934 set_buffer_uptodate(bh);
935 if (block < end_block)
936 set_buffer_mapped(bh);
937 }
938 block++;
939 bh = bh->b_this_page;
940 } while (bh != head);
941
942
943
944
945 return end_block;
946}
947
948
949
950
951
952
953static int
954grow_dev_page(struct block_device *bdev, sector_t block,
955 pgoff_t index, int size, int sizebits)
956{
957 struct inode *inode = bdev->bd_inode;
958 struct page *page;
959 struct buffer_head *bh;
960 sector_t end_block;
961 int ret = 0;
962 gfp_t gfp_mask;
963
964 gfp_mask = mapping_gfp_mask(inode->i_mapping) & ~__GFP_FS;
965 gfp_mask |= __GFP_MOVABLE;
966
967
968
969
970
971
972 gfp_mask |= __GFP_NOFAIL;
973
974 page = find_or_create_page(inode->i_mapping, index, gfp_mask);
975 if (!page)
976 return ret;
977
978 BUG_ON(!PageLocked(page));
979
980 if (page_has_buffers(page)) {
981 bh = page_buffers(page);
982 if (bh->b_size == size) {
983 end_block = init_page_buffers(page, bdev,
984 index << sizebits, size);
985 goto done;
986 }
987 if (!try_to_free_buffers(page))
988 goto failed;
989 }
990
991
992
993
994 bh = alloc_page_buffers(page, size, 0);
995 if (!bh)
996 goto failed;
997
998
999
1000
1001
1002
1003 spin_lock(&inode->i_mapping->private_lock);
1004 link_dev_buffers(page, bh);
1005 end_block = init_page_buffers(page, bdev, index << sizebits, size);
1006 spin_unlock(&inode->i_mapping->private_lock);
1007done:
1008 ret = (block < end_block) ? 1 : -ENXIO;
1009failed:
1010 unlock_page(page);
1011 page_cache_release(page);
1012 return ret;
1013}
1014
1015
1016
1017
1018
1019static int
1020grow_buffers(struct block_device *bdev, sector_t block, int size)
1021{
1022 pgoff_t index;
1023 int sizebits;
1024
1025 sizebits = -1;
1026 do {
1027 sizebits++;
1028 } while ((size << sizebits) < PAGE_SIZE);
1029
1030 index = block >> sizebits;
1031
1032
1033
1034
1035
1036 if (unlikely(index != block >> sizebits)) {
1037 char b[BDEVNAME_SIZE];
1038
1039 printk(KERN_ERR "%s: requested out-of-range block %llu for "
1040 "device %s\n",
1041 __func__, (unsigned long long)block,
1042 bdevname(bdev, b));
1043 return -EIO;
1044 }
1045
1046
1047 return grow_dev_page(bdev, block, index, size, sizebits);
1048}
1049
1050static struct buffer_head *
1051__getblk_slow(struct block_device *bdev, sector_t block, int size)
1052{
1053
1054 if (unlikely(size & (bdev_logical_block_size(bdev)-1) ||
1055 (size < 512 || size > PAGE_SIZE))) {
1056 printk(KERN_ERR "getblk(): invalid block size %d requested\n",
1057 size);
1058 printk(KERN_ERR "logical block size: %d\n",
1059 bdev_logical_block_size(bdev));
1060
1061 dump_stack();
1062 return NULL;
1063 }
1064
1065 for (;;) {
1066 struct buffer_head *bh;
1067 int ret;
1068
1069 bh = __find_get_block(bdev, block, size);
1070 if (bh)
1071 return bh;
1072
1073 ret = grow_buffers(bdev, block, size);
1074 if (ret < 0)
1075 return NULL;
1076 if (ret == 0)
1077 free_more_memory();
1078 }
1079}
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116void mark_buffer_dirty(struct buffer_head *bh)
1117{
1118 WARN_ON_ONCE(!buffer_uptodate(bh));
1119
1120 trace_block_dirty_buffer(bh);
1121
1122
1123
1124
1125
1126
1127
1128 if (buffer_dirty(bh)) {
1129 smp_mb();
1130 if (buffer_dirty(bh))
1131 return;
1132 }
1133
1134 if (!test_set_buffer_dirty(bh)) {
1135 struct page *page = bh->b_page;
1136 if (!TestSetPageDirty(page)) {
1137 struct address_space *mapping = page_mapping(page);
1138 if (mapping)
1139 __set_page_dirty(page, mapping, 0);
1140 }
1141 }
1142}
1143EXPORT_SYMBOL(mark_buffer_dirty);
1144
1145
1146
1147
1148
1149
1150
1151
1152void __brelse(struct buffer_head * buf)
1153{
1154 if (atomic_read(&buf->b_count)) {
1155 put_bh(buf);
1156 return;
1157 }
1158 WARN(1, KERN_ERR "VFS: brelse: Trying to free free buffer\n");
1159}
1160EXPORT_SYMBOL(__brelse);
1161
1162
1163
1164
1165
1166void __bforget(struct buffer_head *bh)
1167{
1168 clear_buffer_dirty(bh);
1169 if (bh->b_assoc_map) {
1170 struct address_space *buffer_mapping = bh->b_page->mapping;
1171
1172 spin_lock(&buffer_mapping->private_lock);
1173 list_del_init(&bh->b_assoc_buffers);
1174 bh->b_assoc_map = NULL;
1175 spin_unlock(&buffer_mapping->private_lock);
1176 }
1177 __brelse(bh);
1178}
1179EXPORT_SYMBOL(__bforget);
1180
1181static struct buffer_head *__bread_slow(struct buffer_head *bh)
1182{
1183 lock_buffer(bh);
1184 if (buffer_uptodate(bh)) {
1185 unlock_buffer(bh);
1186 return bh;
1187 } else {
1188 get_bh(bh);
1189 bh->b_end_io = end_buffer_read_sync;
1190 submit_bh(READ, bh);
1191 wait_on_buffer(bh);
1192 if (buffer_uptodate(bh))
1193 return bh;
1194 }
1195 brelse(bh);
1196 return NULL;
1197}
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213#define BH_LRU_SIZE 16
1214
1215struct bh_lru {
1216 struct buffer_head *bhs[BH_LRU_SIZE];
1217};
1218
1219static DEFINE_PER_CPU(struct bh_lru, bh_lrus) = {{ NULL }};
1220
1221#ifdef CONFIG_SMP
1222#define bh_lru_lock() local_irq_disable()
1223#define bh_lru_unlock() local_irq_enable()
1224#else
1225#define bh_lru_lock() preempt_disable()
1226#define bh_lru_unlock() preempt_enable()
1227#endif
1228
1229static inline void check_irqs_on(void)
1230{
1231#ifdef irqs_disabled
1232 BUG_ON(irqs_disabled());
1233#endif
1234}
1235
1236
1237
1238
1239static void bh_lru_install(struct buffer_head *bh)
1240{
1241 struct buffer_head *evictee = NULL;
1242
1243 check_irqs_on();
1244 bh_lru_lock();
1245 if (__this_cpu_read(bh_lrus.bhs[0]) != bh) {
1246 struct buffer_head *bhs[BH_LRU_SIZE];
1247 int in;
1248 int out = 0;
1249
1250 get_bh(bh);
1251 bhs[out++] = bh;
1252 for (in = 0; in < BH_LRU_SIZE; in++) {
1253 struct buffer_head *bh2 =
1254 __this_cpu_read(bh_lrus.bhs[in]);
1255
1256 if (bh2 == bh) {
1257 __brelse(bh2);
1258 } else {
1259 if (out >= BH_LRU_SIZE) {
1260 BUG_ON(evictee != NULL);
1261 evictee = bh2;
1262 } else {
1263 bhs[out++] = bh2;
1264 }
1265 }
1266 }
1267 while (out < BH_LRU_SIZE)
1268 bhs[out++] = NULL;
1269 memcpy(__this_cpu_ptr(&bh_lrus.bhs), bhs, sizeof(bhs));
1270 }
1271 bh_lru_unlock();
1272
1273 if (evictee)
1274 __brelse(evictee);
1275}
1276
1277
1278
1279
1280static struct buffer_head *
1281lookup_bh_lru(struct block_device *bdev, sector_t block, unsigned size)
1282{
1283 struct buffer_head *ret = NULL;
1284 unsigned int i;
1285
1286 check_irqs_on();
1287 bh_lru_lock();
1288 for (i = 0; i < BH_LRU_SIZE; i++) {
1289 struct buffer_head *bh = __this_cpu_read(bh_lrus.bhs[i]);
1290
1291 if (bh && bh->b_blocknr == block && bh->b_bdev == bdev &&
1292 bh->b_size == size) {
1293 if (i) {
1294 while (i) {
1295 __this_cpu_write(bh_lrus.bhs[i],
1296 __this_cpu_read(bh_lrus.bhs[i - 1]));
1297 i--;
1298 }
1299 __this_cpu_write(bh_lrus.bhs[0], bh);
1300 }
1301 get_bh(bh);
1302 ret = bh;
1303 break;
1304 }
1305 }
1306 bh_lru_unlock();
1307 return ret;
1308}
1309
1310
1311
1312
1313
1314
1315struct buffer_head *
1316__find_get_block(struct block_device *bdev, sector_t block, unsigned size)
1317{
1318 struct buffer_head *bh = lookup_bh_lru(bdev, block, size);
1319
1320 if (bh == NULL) {
1321 bh = __find_get_block_slow(bdev, block);
1322 if (bh)
1323 bh_lru_install(bh);
1324 }
1325 if (bh)
1326 touch_buffer(bh);
1327 return bh;
1328}
1329EXPORT_SYMBOL(__find_get_block);
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339struct buffer_head *
1340__getblk(struct block_device *bdev, sector_t block, unsigned size)
1341{
1342 struct buffer_head *bh = __find_get_block(bdev, block, size);
1343
1344 might_sleep();
1345 if (bh == NULL)
1346 bh = __getblk_slow(bdev, block, size);
1347 return bh;
1348}
1349EXPORT_SYMBOL(__getblk);
1350
1351
1352
1353
1354void __breadahead(struct block_device *bdev, sector_t block, unsigned size)
1355{
1356 struct buffer_head *bh = __getblk(bdev, block, size);
1357 if (likely(bh)) {
1358 ll_rw_block(READA, 1, &bh);
1359 brelse(bh);
1360 }
1361}
1362EXPORT_SYMBOL(__breadahead);
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373struct buffer_head *
1374__bread(struct block_device *bdev, sector_t block, unsigned size)
1375{
1376 struct buffer_head *bh = __getblk(bdev, block, size);
1377
1378 if (likely(bh) && !buffer_uptodate(bh))
1379 bh = __bread_slow(bh);
1380 return bh;
1381}
1382EXPORT_SYMBOL(__bread);
1383
1384
1385
1386
1387
1388
1389static void invalidate_bh_lru(void *arg)
1390{
1391 struct bh_lru *b = &get_cpu_var(bh_lrus);
1392 int i;
1393
1394 for (i = 0; i < BH_LRU_SIZE; i++) {
1395 brelse(b->bhs[i]);
1396 b->bhs[i] = NULL;
1397 }
1398 put_cpu_var(bh_lrus);
1399}
1400
1401static bool has_bh_in_lru(int cpu, void *dummy)
1402{
1403 struct bh_lru *b = per_cpu_ptr(&bh_lrus, cpu);
1404 int i;
1405
1406 for (i = 0; i < BH_LRU_SIZE; i++) {
1407 if (b->bhs[i])
1408 return 1;
1409 }
1410
1411 return 0;
1412}
1413
1414void invalidate_bh_lrus(void)
1415{
1416 on_each_cpu_cond(has_bh_in_lru, invalidate_bh_lru, NULL, 1, GFP_KERNEL);
1417}
1418EXPORT_SYMBOL_GPL(invalidate_bh_lrus);
1419
1420void set_bh_page(struct buffer_head *bh,
1421 struct page *page, unsigned long offset)
1422{
1423 bh->b_page = page;
1424 BUG_ON(offset >= PAGE_SIZE);
1425 if (PageHighMem(page))
1426
1427
1428
1429 bh->b_data = (char *)(0 + offset);
1430 else
1431 bh->b_data = page_address(page) + offset;
1432}
1433EXPORT_SYMBOL(set_bh_page);
1434
1435
1436
1437
1438static void discard_buffer(struct buffer_head * bh)
1439{
1440 lock_buffer(bh);
1441 clear_buffer_dirty(bh);
1442 bh->b_bdev = NULL;
1443 clear_buffer_mapped(bh);
1444 clear_buffer_req(bh);
1445 clear_buffer_new(bh);
1446 clear_buffer_delay(bh);
1447 clear_buffer_unwritten(bh);
1448 unlock_buffer(bh);
1449}
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466void block_invalidatepage(struct page *page, unsigned long offset)
1467{
1468 return block_invalidatepage_range(page, offset,
1469 PAGE_CACHE_SIZE - offset);
1470}
1471EXPORT_SYMBOL(block_invalidatepage);
1472
1473void block_invalidatepage_range(struct page *page, unsigned int offset,
1474 unsigned int length)
1475{
1476 struct buffer_head *head, *bh, *next;
1477 unsigned int curr_off = 0;
1478 unsigned int stop = length + offset;
1479
1480 BUG_ON(!PageLocked(page));
1481 if (!page_has_buffers(page))
1482 goto out;
1483
1484
1485
1486
1487 BUG_ON(stop > PAGE_CACHE_SIZE || stop < length);
1488
1489 head = page_buffers(page);
1490 bh = head;
1491 do {
1492 unsigned int next_off = curr_off + bh->b_size;
1493 next = bh->b_this_page;
1494
1495
1496
1497
1498 if (next_off > stop)
1499 goto out;
1500
1501
1502
1503
1504 if (offset <= curr_off)
1505 discard_buffer(bh);
1506 curr_off = next_off;
1507 bh = next;
1508 } while (bh != head);
1509
1510
1511
1512
1513
1514
1515 if (offset == 0)
1516 try_to_release_page(page, 0);
1517out:
1518 return;
1519}
1520EXPORT_SYMBOL(block_invalidatepage_range);
1521
1522
1523
1524
1525
1526
1527void create_empty_buffers(struct page *page,
1528 unsigned long blocksize, unsigned long b_state)
1529{
1530 struct buffer_head *bh, *head, *tail;
1531
1532 head = alloc_page_buffers(page, blocksize, 1);
1533 bh = head;
1534 do {
1535 bh->b_state |= b_state;
1536 tail = bh;
1537 bh = bh->b_this_page;
1538 } while (bh);
1539 tail->b_this_page = head;
1540
1541 spin_lock(&page->mapping->private_lock);
1542 if (PageUptodate(page) || PageDirty(page)) {
1543 bh = head;
1544 do {
1545 if (PageDirty(page))
1546 set_buffer_dirty(bh);
1547 if (PageUptodate(page))
1548 set_buffer_uptodate(bh);
1549 bh = bh->b_this_page;
1550 } while (bh != head);
1551 }
1552 attach_page_buffers(page, head);
1553 spin_unlock(&page->mapping->private_lock);
1554}
1555EXPORT_SYMBOL(create_empty_buffers);
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573void unmap_underlying_metadata(struct block_device *bdev, sector_t block)
1574{
1575 struct buffer_head *old_bh;
1576
1577 might_sleep();
1578
1579 old_bh = __find_get_block_slow(bdev, block);
1580 if (old_bh) {
1581 clear_buffer_dirty(old_bh);
1582 wait_on_buffer(old_bh);
1583 clear_buffer_req(old_bh);
1584 __brelse(old_bh);
1585 }
1586}
1587EXPORT_SYMBOL(unmap_underlying_metadata);
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597static inline int block_size_bits(unsigned int blocksize)
1598{
1599 return ilog2(blocksize);
1600}
1601
1602static struct buffer_head *create_page_buffers(struct page *page, struct inode *inode, unsigned int b_state)
1603{
1604 BUG_ON(!PageLocked(page));
1605
1606 if (!page_has_buffers(page))
1607 create_empty_buffers(page, 1 << ACCESS_ONCE(inode->i_blkbits), b_state);
1608 return page_buffers(page);
1609}
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640int __block_write_full_page(struct inode *inode, struct page *page,
1641 get_block_t *get_block, struct writeback_control *wbc,
1642 bh_end_io_t *handler)
1643{
1644 int err;
1645 sector_t block;
1646 sector_t last_block;
1647 struct buffer_head *bh, *head;
1648 unsigned int blocksize, bbits;
1649 int nr_underway = 0;
1650 int write_op = (wbc->sync_mode == WB_SYNC_ALL ?
1651 WRITE_SYNC : WRITE);
1652
1653 head = create_page_buffers(page, inode,
1654 (1 << BH_Dirty)|(1 << BH_Uptodate));
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666 bh = head;
1667 blocksize = bh->b_size;
1668 bbits = block_size_bits(blocksize);
1669
1670 block = (sector_t)page->index << (PAGE_CACHE_SHIFT - bbits);
1671 last_block = (i_size_read(inode) - 1) >> bbits;
1672
1673
1674
1675
1676
1677 do {
1678 if (block > last_block) {
1679
1680
1681
1682
1683
1684
1685
1686
1687 clear_buffer_dirty(bh);
1688 set_buffer_uptodate(bh);
1689 } else if ((!buffer_mapped(bh) || buffer_delay(bh)) &&
1690 buffer_dirty(bh)) {
1691 WARN_ON(bh->b_size != blocksize);
1692 err = get_block(inode, block, bh, 1);
1693 if (err)
1694 goto recover;
1695 clear_buffer_delay(bh);
1696 if (buffer_new(bh)) {
1697
1698 clear_buffer_new(bh);
1699 unmap_underlying_metadata(bh->b_bdev,
1700 bh->b_blocknr);
1701 }
1702 }
1703 bh = bh->b_this_page;
1704 block++;
1705 } while (bh != head);
1706
1707 do {
1708 if (!buffer_mapped(bh))
1709 continue;
1710
1711
1712
1713
1714
1715
1716
1717 if (wbc->sync_mode != WB_SYNC_NONE) {
1718 lock_buffer(bh);
1719 } else if (!trylock_buffer(bh)) {
1720 redirty_page_for_writepage(wbc, page);
1721 continue;
1722 }
1723 if (test_clear_buffer_dirty(bh)) {
1724 mark_buffer_async_write_endio(bh, handler);
1725 } else {
1726 unlock_buffer(bh);
1727 }
1728 } while ((bh = bh->b_this_page) != head);
1729
1730
1731
1732
1733
1734 BUG_ON(PageWriteback(page));
1735 set_page_writeback(page);
1736
1737 do {
1738 struct buffer_head *next = bh->b_this_page;
1739 if (buffer_async_write(bh)) {
1740 submit_bh(write_op, bh);
1741 nr_underway++;
1742 }
1743 bh = next;
1744 } while (bh != head);
1745 unlock_page(page);
1746
1747 err = 0;
1748done:
1749 if (nr_underway == 0) {
1750
1751
1752
1753
1754
1755 end_page_writeback(page);
1756
1757
1758
1759
1760
1761 }
1762 return err;
1763
1764recover:
1765
1766
1767
1768
1769
1770
1771 bh = head;
1772
1773 do {
1774 if (buffer_mapped(bh) && buffer_dirty(bh) &&
1775 !buffer_delay(bh)) {
1776 lock_buffer(bh);
1777 mark_buffer_async_write_endio(bh, handler);
1778 } else {
1779
1780
1781
1782
1783 clear_buffer_dirty(bh);
1784 }
1785 } while ((bh = bh->b_this_page) != head);
1786 SetPageError(page);
1787 BUG_ON(PageWriteback(page));
1788 mapping_set_error(page->mapping, err);
1789 set_page_writeback(page);
1790 do {
1791 struct buffer_head *next = bh->b_this_page;
1792 if (buffer_async_write(bh)) {
1793 clear_buffer_dirty(bh);
1794 submit_bh(write_op, bh);
1795 nr_underway++;
1796 }
1797 bh = next;
1798 } while (bh != head);
1799 unlock_page(page);
1800 goto done;
1801}
1802EXPORT_SYMBOL(__block_write_full_page);
1803
1804
1805
1806
1807
1808
1809void page_zero_new_buffers(struct page *page, unsigned from, unsigned to)
1810{
1811 unsigned int block_start, block_end;
1812 struct buffer_head *head, *bh;
1813
1814 BUG_ON(!PageLocked(page));
1815 if (!page_has_buffers(page))
1816 return;
1817
1818 bh = head = page_buffers(page);
1819 block_start = 0;
1820 do {
1821 block_end = block_start + bh->b_size;
1822
1823 if (buffer_new(bh)) {
1824 if (block_end > from && block_start < to) {
1825 if (!PageUptodate(page)) {
1826 unsigned start, size;
1827
1828 start = max(from, block_start);
1829 size = min(to, block_end) - start;
1830
1831 zero_user(page, start, size);
1832 set_buffer_uptodate(bh);
1833 }
1834
1835 clear_buffer_new(bh);
1836 mark_buffer_dirty(bh);
1837 }
1838 }
1839
1840 block_start = block_end;
1841 bh = bh->b_this_page;
1842 } while (bh != head);
1843}
1844EXPORT_SYMBOL(page_zero_new_buffers);
1845
1846static void
1847iomap_to_bh(struct inode *inode, sector_t block, struct buffer_head *bh,
1848 struct iomap *iomap)
1849{
1850 loff_t offset = block << inode->i_blkbits;
1851
1852 bh->b_bdev = iomap->bdev;
1853
1854
1855
1856
1857
1858
1859
1860 BUG_ON(offset >= iomap->offset + iomap->length);
1861
1862 switch (iomap->type) {
1863 case IOMAP_HOLE:
1864
1865
1866
1867
1868
1869 if (!buffer_uptodate(bh) ||
1870 (offset >= i_size_read(inode)))
1871 set_buffer_new(bh);
1872 break;
1873 case IOMAP_DELALLOC:
1874 if (!buffer_uptodate(bh) ||
1875 (offset >= i_size_read(inode)))
1876 set_buffer_new(bh);
1877 set_buffer_uptodate(bh);
1878 set_buffer_mapped(bh);
1879 set_buffer_delay(bh);
1880 break;
1881 case IOMAP_UNWRITTEN:
1882
1883
1884
1885
1886
1887 set_buffer_new(bh);
1888 set_buffer_unwritten(bh);
1889
1890 case IOMAP_MAPPED:
1891 if (offset >= i_size_read(inode))
1892 set_buffer_new(bh);
1893 bh->b_blocknr = (iomap->addr + offset - iomap->offset) >>
1894 inode->i_blkbits;
1895 set_buffer_mapped(bh);
1896 break;
1897 }
1898}
1899
1900int __block_write_begin_int(struct page *page, loff_t pos, unsigned len,
1901 get_block_t *get_block, struct iomap *iomap)
1902{
1903 unsigned from = pos & (PAGE_CACHE_SIZE - 1);
1904 unsigned to = from + len;
1905 struct inode *inode = page->mapping->host;
1906 unsigned block_start, block_end;
1907 sector_t block;
1908 int err = 0;
1909 unsigned blocksize, bbits;
1910 struct buffer_head *bh, *head, *wait[2], **wait_bh=wait;
1911
1912 BUG_ON(!PageLocked(page));
1913 BUG_ON(from > PAGE_CACHE_SIZE);
1914 BUG_ON(to > PAGE_CACHE_SIZE);
1915 BUG_ON(from > to);
1916
1917 head = create_page_buffers(page, inode, 0);
1918 blocksize = head->b_size;
1919 bbits = block_size_bits(blocksize);
1920
1921 block = (sector_t)page->index << (PAGE_CACHE_SHIFT - bbits);
1922
1923 for(bh = head, block_start = 0; bh != head || !block_start;
1924 block++, block_start=block_end, bh = bh->b_this_page) {
1925 block_end = block_start + blocksize;
1926 if (block_end <= from || block_start >= to) {
1927 if (PageUptodate(page)) {
1928 if (!buffer_uptodate(bh))
1929 set_buffer_uptodate(bh);
1930 }
1931 continue;
1932 }
1933 if (buffer_new(bh))
1934 clear_buffer_new(bh);
1935 if (!buffer_mapped(bh)) {
1936 WARN_ON(bh->b_size != blocksize);
1937 if (get_block) {
1938 err = get_block(inode, block, bh, 1);
1939 if (err)
1940 break;
1941 } else {
1942 iomap_to_bh(inode, block, bh, iomap);
1943 }
1944
1945 if (buffer_new(bh)) {
1946 unmap_underlying_metadata(bh->b_bdev,
1947 bh->b_blocknr);
1948 if (PageUptodate(page)) {
1949 clear_buffer_new(bh);
1950 set_buffer_uptodate(bh);
1951 mark_buffer_dirty(bh);
1952 continue;
1953 }
1954 if (block_end > to || block_start < from)
1955 zero_user_segments(page,
1956 to, block_end,
1957 block_start, from);
1958 continue;
1959 }
1960 }
1961 if (PageUptodate(page)) {
1962 if (!buffer_uptodate(bh))
1963 set_buffer_uptodate(bh);
1964 continue;
1965 }
1966 if (!buffer_uptodate(bh) && !buffer_delay(bh) &&
1967 !buffer_unwritten(bh) &&
1968 (block_start < from || block_end > to)) {
1969 ll_rw_block(READ, 1, &bh);
1970 *wait_bh++=bh;
1971 }
1972 }
1973
1974
1975
1976 while(wait_bh > wait) {
1977 wait_on_buffer(*--wait_bh);
1978 if (!buffer_uptodate(*wait_bh))
1979 err = -EIO;
1980 }
1981 if (unlikely(err))
1982 page_zero_new_buffers(page, from, to);
1983 return err;
1984}
1985
1986int __block_write_begin(struct page *page, loff_t pos, unsigned len,
1987 get_block_t *get_block)
1988{
1989 return __block_write_begin_int(page, pos, len, get_block, NULL);
1990}
1991EXPORT_SYMBOL(__block_write_begin);
1992
1993static int __block_commit_write(struct inode *inode, struct page *page,
1994 unsigned from, unsigned to)
1995{
1996 unsigned block_start, block_end;
1997 int partial = 0;
1998 unsigned blocksize;
1999 struct buffer_head *bh, *head;
2000
2001 bh = head = page_buffers(page);
2002 blocksize = bh->b_size;
2003
2004 block_start = 0;
2005 do {
2006 block_end = block_start + blocksize;
2007 if (block_end <= from || block_start >= to) {
2008 if (!buffer_uptodate(bh))
2009 partial = 1;
2010 } else {
2011 set_buffer_uptodate(bh);
2012 mark_buffer_dirty(bh);
2013 }
2014 clear_buffer_new(bh);
2015
2016 block_start = block_end;
2017 bh = bh->b_this_page;
2018 } while (bh != head);
2019
2020
2021
2022
2023
2024
2025
2026 if (!partial)
2027 SetPageUptodate(page);
2028 return 0;
2029}
2030
2031
2032
2033
2034
2035
2036
2037int block_write_begin(struct address_space *mapping, loff_t pos, unsigned len,
2038 unsigned flags, struct page **pagep, get_block_t *get_block)
2039{
2040 pgoff_t index = pos >> PAGE_CACHE_SHIFT;
2041 struct page *page;
2042 int status;
2043
2044 page = grab_cache_page_write_begin(mapping, index, flags);
2045 if (!page)
2046 return -ENOMEM;
2047
2048 status = __block_write_begin(page, pos, len, get_block);
2049 if (unlikely(status)) {
2050 unlock_page(page);
2051 page_cache_release(page);
2052 page = NULL;
2053 }
2054
2055 *pagep = page;
2056 return status;
2057}
2058EXPORT_SYMBOL(block_write_begin);
2059
2060int block_write_end(struct file *file, struct address_space *mapping,
2061 loff_t pos, unsigned len, unsigned copied,
2062 struct page *page, void *fsdata)
2063{
2064 struct inode *inode = mapping->host;
2065 unsigned start;
2066
2067 start = pos & (PAGE_CACHE_SIZE - 1);
2068
2069 if (unlikely(copied < len)) {
2070
2071
2072
2073
2074
2075
2076
2077
2078
2079
2080
2081
2082 if (!PageUptodate(page))
2083 copied = 0;
2084
2085 page_zero_new_buffers(page, start+copied, start+len);
2086 }
2087 flush_dcache_page(page);
2088
2089
2090 __block_commit_write(inode, page, start, start+copied);
2091
2092 return copied;
2093}
2094EXPORT_SYMBOL(block_write_end);
2095
2096int generic_write_end(struct file *file, struct address_space *mapping,
2097 loff_t pos, unsigned len, unsigned copied,
2098 struct page *page, void *fsdata)
2099{
2100 struct inode *inode = mapping->host;
2101 loff_t old_size = inode->i_size;
2102 int i_size_changed = 0;
2103
2104 copied = block_write_end(file, mapping, pos, len, copied, page, fsdata);
2105
2106
2107
2108
2109
2110
2111
2112
2113 if (pos+copied > inode->i_size) {
2114 i_size_write(inode, pos+copied);
2115 i_size_changed = 1;
2116 }
2117
2118 unlock_page(page);
2119 page_cache_release(page);
2120
2121 if (old_size < pos)
2122 pagecache_isize_extended(inode, old_size, pos);
2123
2124
2125
2126
2127
2128
2129 if (i_size_changed)
2130 mark_inode_dirty(inode);
2131
2132 return copied;
2133}
2134EXPORT_SYMBOL(generic_write_end);
2135
2136
2137
2138
2139
2140
2141
2142
2143int block_is_partially_uptodate(struct page *page, read_descriptor_t *desc,
2144 unsigned long from)
2145{
2146 unsigned block_start, block_end, blocksize;
2147 unsigned to;
2148 struct buffer_head *bh, *head;
2149 int ret = 1;
2150
2151 if (!page_has_buffers(page))
2152 return 0;
2153
2154 head = page_buffers(page);
2155 blocksize = head->b_size;
2156 to = min_t(unsigned, PAGE_CACHE_SIZE - from, desc->count);
2157 to = from + to;
2158 if (from < blocksize && to > PAGE_CACHE_SIZE - blocksize)
2159 return 0;
2160
2161 bh = head;
2162 block_start = 0;
2163 do {
2164 block_end = block_start + blocksize;
2165 if (block_end > from && block_start < to) {
2166 if (!buffer_uptodate(bh)) {
2167 ret = 0;
2168 break;
2169 }
2170 if (block_end >= to)
2171 break;
2172 }
2173 block_start = block_end;
2174 bh = bh->b_this_page;
2175 } while (bh != head);
2176
2177 return ret;
2178}
2179EXPORT_SYMBOL(block_is_partially_uptodate);
2180
2181
2182
2183
2184
2185
2186
2187
2188int block_read_full_page(struct page *page, get_block_t *get_block)
2189{
2190 struct inode *inode = page->mapping->host;
2191 sector_t iblock, lblock;
2192 struct buffer_head *bh, *head, *arr[MAX_BUF_PER_PAGE];
2193 unsigned int blocksize, bbits;
2194 int nr, i;
2195 int fully_mapped = 1;
2196
2197 head = create_page_buffers(page, inode, 0);
2198 blocksize = head->b_size;
2199 bbits = block_size_bits(blocksize);
2200
2201 iblock = (sector_t)page->index << (PAGE_CACHE_SHIFT - bbits);
2202 lblock = (i_size_read(inode)+blocksize-1) >> bbits;
2203 bh = head;
2204 nr = 0;
2205 i = 0;
2206
2207 do {
2208 if (buffer_uptodate(bh))
2209 continue;
2210
2211 if (!buffer_mapped(bh)) {
2212 int err = 0;
2213
2214 fully_mapped = 0;
2215 if (iblock < lblock) {
2216 WARN_ON(bh->b_size != blocksize);
2217 err = get_block(inode, iblock, bh, 0);
2218 if (err)
2219 SetPageError(page);
2220 }
2221 if (!buffer_mapped(bh)) {
2222 zero_user(page, i * blocksize, blocksize);
2223 if (!err)
2224 set_buffer_uptodate(bh);
2225 continue;
2226 }
2227
2228
2229
2230
2231 if (buffer_uptodate(bh))
2232 continue;
2233 }
2234 arr[nr++] = bh;
2235 } while (i++, iblock++, (bh = bh->b_this_page) != head);
2236
2237 if (fully_mapped)
2238 SetPageMappedToDisk(page);
2239
2240 if (!nr) {
2241
2242
2243
2244
2245 if (!PageError(page))
2246 SetPageUptodate(page);
2247 unlock_page(page);
2248 return 0;
2249 }
2250
2251
2252 for (i = 0; i < nr; i++) {
2253 bh = arr[i];
2254 lock_buffer(bh);
2255 mark_buffer_async_read(bh);
2256 }
2257
2258
2259
2260
2261
2262
2263 for (i = 0; i < nr; i++) {
2264 bh = arr[i];
2265 if (buffer_uptodate(bh))
2266 end_buffer_async_read(bh, 1);
2267 else
2268 submit_bh(READ, bh);
2269 }
2270 return 0;
2271}
2272EXPORT_SYMBOL(block_read_full_page);
2273
2274
2275
2276
2277
2278int generic_cont_expand_simple(struct inode *inode, loff_t size)
2279{
2280 struct address_space *mapping = inode->i_mapping;
2281 struct page *page;
2282 void *fsdata;
2283 int err;
2284
2285 err = inode_newsize_ok(inode, size);
2286 if (err)
2287 goto out;
2288
2289 err = pagecache_write_begin(NULL, mapping, size, 0,
2290 AOP_FLAG_UNINTERRUPTIBLE|AOP_FLAG_CONT_EXPAND,
2291 &page, &fsdata);
2292 if (err)
2293 goto out;
2294
2295 err = pagecache_write_end(NULL, mapping, size, 0, 0, page, fsdata);
2296 BUG_ON(err > 0);
2297
2298out:
2299 return err;
2300}
2301EXPORT_SYMBOL(generic_cont_expand_simple);
2302
2303static int cont_expand_zero(struct file *file, struct address_space *mapping,
2304 loff_t pos, loff_t *bytes)
2305{
2306 struct inode *inode = mapping->host;
2307 unsigned blocksize = 1 << inode->i_blkbits;
2308 struct page *page;
2309 void *fsdata;
2310 pgoff_t index, curidx;
2311 loff_t curpos;
2312 unsigned zerofrom, offset, len;
2313 int err = 0;
2314
2315 index = pos >> PAGE_CACHE_SHIFT;
2316 offset = pos & ~PAGE_CACHE_MASK;
2317
2318 while (index > (curidx = (curpos = *bytes)>>PAGE_CACHE_SHIFT)) {
2319 zerofrom = curpos & ~PAGE_CACHE_MASK;
2320 if (zerofrom & (blocksize-1)) {
2321 *bytes |= (blocksize-1);
2322 (*bytes)++;
2323 }
2324 len = PAGE_CACHE_SIZE - zerofrom;
2325
2326 err = pagecache_write_begin(file, mapping, curpos, len,
2327 AOP_FLAG_UNINTERRUPTIBLE,
2328 &page, &fsdata);
2329 if (err)
2330 goto out;
2331 zero_user(page, zerofrom, len);
2332 err = pagecache_write_end(file, mapping, curpos, len, len,
2333 page, fsdata);
2334 if (err < 0)
2335 goto out;
2336 BUG_ON(err != len);
2337 err = 0;
2338
2339 balance_dirty_pages_ratelimited(mapping);
2340 }
2341
2342
2343 if (index == curidx) {
2344 zerofrom = curpos & ~PAGE_CACHE_MASK;
2345
2346 if (offset <= zerofrom) {
2347 goto out;
2348 }
2349 if (zerofrom & (blocksize-1)) {
2350 *bytes |= (blocksize-1);
2351 (*bytes)++;
2352 }
2353 len = offset - zerofrom;
2354
2355 err = pagecache_write_begin(file, mapping, curpos, len,
2356 AOP_FLAG_UNINTERRUPTIBLE,
2357 &page, &fsdata);
2358 if (err)
2359 goto out;
2360 zero_user(page, zerofrom, len);
2361 err = pagecache_write_end(file, mapping, curpos, len, len,
2362 page, fsdata);
2363 if (err < 0)
2364 goto out;
2365 BUG_ON(err != len);
2366 err = 0;
2367 }
2368out:
2369 return err;
2370}
2371
2372
2373
2374
2375
2376int cont_write_begin(struct file *file, struct address_space *mapping,
2377 loff_t pos, unsigned len, unsigned flags,
2378 struct page **pagep, void **fsdata,
2379 get_block_t *get_block, loff_t *bytes)
2380{
2381 struct inode *inode = mapping->host;
2382 unsigned blocksize = 1 << inode->i_blkbits;
2383 unsigned zerofrom;
2384 int err;
2385
2386 err = cont_expand_zero(file, mapping, pos, bytes);
2387 if (err)
2388 return err;
2389
2390 zerofrom = *bytes & ~PAGE_CACHE_MASK;
2391 if (pos+len > *bytes && zerofrom & (blocksize-1)) {
2392 *bytes |= (blocksize-1);
2393 (*bytes)++;
2394 }
2395
2396 return block_write_begin(mapping, pos, len, flags, pagep, get_block);
2397}
2398EXPORT_SYMBOL(cont_write_begin);
2399
2400int block_commit_write(struct page *page, unsigned from, unsigned to)
2401{
2402 struct inode *inode = page->mapping->host;
2403 __block_commit_write(inode,page,from,to);
2404 return 0;
2405}
2406EXPORT_SYMBOL(block_commit_write);
2407
2408
2409
2410
2411
2412
2413
2414
2415
2416
2417
2418
2419
2420
2421
2422
2423
2424
2425
2426int __block_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf,
2427 get_block_t get_block)
2428{
2429 struct page *page = vmf->page;
2430 struct inode *inode = file_inode(vma->vm_file);
2431 unsigned long end;
2432 loff_t size;
2433 int ret;
2434
2435 lock_page(page);
2436 size = i_size_read(inode);
2437 if ((page->mapping != inode->i_mapping) ||
2438 (page_offset(page) > size)) {
2439
2440 ret = -EFAULT;
2441 goto out_unlock;
2442 }
2443
2444
2445 if (((page->index + 1) << PAGE_CACHE_SHIFT) > size)
2446 end = size & ~PAGE_CACHE_MASK;
2447 else
2448 end = PAGE_CACHE_SIZE;
2449
2450 ret = __block_write_begin(page, 0, end, get_block);
2451 if (!ret)
2452 ret = block_commit_write(page, 0, end);
2453
2454 if (unlikely(ret < 0))
2455 goto out_unlock;
2456 set_page_dirty(page);
2457 wait_for_stable_page(page);
2458 return 0;
2459out_unlock:
2460 unlock_page(page);
2461 return ret;
2462}
2463EXPORT_SYMBOL(__block_page_mkwrite);
2464
2465int block_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf,
2466 get_block_t get_block)
2467{
2468 int ret;
2469 struct super_block *sb = file_inode(vma->vm_file)->i_sb;
2470
2471 sb_start_pagefault(sb);
2472
2473
2474
2475
2476
2477 file_update_time(vma->vm_file);
2478
2479 ret = __block_page_mkwrite(vma, vmf, get_block);
2480 sb_end_pagefault(sb);
2481 return block_page_mkwrite_return(ret);
2482}
2483EXPORT_SYMBOL(block_page_mkwrite);
2484
2485
2486
2487
2488
2489
2490static void end_buffer_read_nobh(struct buffer_head *bh, int uptodate)
2491{
2492 __end_buffer_read_notouch(bh, uptodate);
2493}
2494
2495
2496
2497
2498
2499
2500static void attach_nobh_buffers(struct page *page, struct buffer_head *head)
2501{
2502 struct buffer_head *bh;
2503
2504 BUG_ON(!PageLocked(page));
2505
2506 spin_lock(&page->mapping->private_lock);
2507 bh = head;
2508 do {
2509 if (PageDirty(page))
2510 set_buffer_dirty(bh);
2511 if (!bh->b_this_page)
2512 bh->b_this_page = head;
2513 bh = bh->b_this_page;
2514 } while (bh != head);
2515 attach_page_buffers(page, head);
2516 spin_unlock(&page->mapping->private_lock);
2517}
2518
2519
2520
2521
2522
2523
2524int nobh_write_begin(struct address_space *mapping,
2525 loff_t pos, unsigned len, unsigned flags,
2526 struct page **pagep, void **fsdata,
2527 get_block_t *get_block)
2528{
2529 struct inode *inode = mapping->host;
2530 const unsigned blkbits = inode->i_blkbits;
2531 const unsigned blocksize = 1 << blkbits;
2532 struct buffer_head *head, *bh;
2533 struct page *page;
2534 pgoff_t index;
2535 unsigned from, to;
2536 unsigned block_in_page;
2537 unsigned block_start, block_end;
2538 sector_t block_in_file;
2539 int nr_reads = 0;
2540 int ret = 0;
2541 int is_mapped_to_disk = 1;
2542
2543 index = pos >> PAGE_CACHE_SHIFT;
2544 from = pos & (PAGE_CACHE_SIZE - 1);
2545 to = from + len;
2546
2547 page = grab_cache_page_write_begin(mapping, index, flags);
2548 if (!page)
2549 return -ENOMEM;
2550 *pagep = page;
2551 *fsdata = NULL;
2552
2553 if (page_has_buffers(page)) {
2554 ret = __block_write_begin(page, pos, len, get_block);
2555 if (unlikely(ret))
2556 goto out_release;
2557 return ret;
2558 }
2559
2560 if (PageMappedToDisk(page))
2561 return 0;
2562
2563
2564
2565
2566
2567
2568
2569
2570
2571
2572 head = alloc_page_buffers(page, blocksize, 0);
2573 if (!head) {
2574 ret = -ENOMEM;
2575 goto out_release;
2576 }
2577
2578 block_in_file = (sector_t)page->index << (PAGE_CACHE_SHIFT - blkbits);
2579
2580
2581
2582
2583
2584
2585 for (block_start = 0, block_in_page = 0, bh = head;
2586 block_start < PAGE_CACHE_SIZE;
2587 block_in_page++, block_start += blocksize, bh = bh->b_this_page) {
2588 int create;
2589
2590 block_end = block_start + blocksize;
2591 bh->b_state = 0;
2592 create = 1;
2593 if (block_start >= to)
2594 create = 0;
2595 ret = get_block(inode, block_in_file + block_in_page,
2596 bh, create);
2597 if (ret)
2598 goto failed;
2599 if (!buffer_mapped(bh))
2600 is_mapped_to_disk = 0;
2601 if (buffer_new(bh))
2602 unmap_underlying_metadata(bh->b_bdev, bh->b_blocknr);
2603 if (PageUptodate(page)) {
2604 set_buffer_uptodate(bh);
2605 continue;
2606 }
2607 if (buffer_new(bh) || !buffer_mapped(bh)) {
2608 zero_user_segments(page, block_start, from,
2609 to, block_end);
2610 continue;
2611 }
2612 if (buffer_uptodate(bh))
2613 continue;
2614 if (block_start < from || block_end > to) {
2615 lock_buffer(bh);
2616 bh->b_end_io = end_buffer_read_nobh;
2617 submit_bh(READ, bh);
2618 nr_reads++;
2619 }
2620 }
2621
2622 if (nr_reads) {
2623
2624
2625
2626
2627
2628 for (bh = head; bh; bh = bh->b_this_page) {
2629 wait_on_buffer(bh);
2630 if (!buffer_uptodate(bh))
2631 ret = -EIO;
2632 }
2633 if (ret)
2634 goto failed;
2635 }
2636
2637 if (is_mapped_to_disk)
2638 SetPageMappedToDisk(page);
2639
2640 *fsdata = head;
2641
2642 return 0;
2643
2644failed:
2645 BUG_ON(!ret);
2646
2647
2648
2649
2650
2651
2652
2653 attach_nobh_buffers(page, head);
2654 page_zero_new_buffers(page, from, to);
2655
2656out_release:
2657 unlock_page(page);
2658 page_cache_release(page);
2659 *pagep = NULL;
2660
2661 return ret;
2662}
2663EXPORT_SYMBOL(nobh_write_begin);
2664
2665int nobh_write_end(struct file *file, struct address_space *mapping,
2666 loff_t pos, unsigned len, unsigned copied,
2667 struct page *page, void *fsdata)
2668{
2669 struct inode *inode = page->mapping->host;
2670 struct buffer_head *head = fsdata;
2671 struct buffer_head *bh;
2672 BUG_ON(fsdata != NULL && page_has_buffers(page));
2673
2674 if (unlikely(copied < len) && head)
2675 attach_nobh_buffers(page, head);
2676 if (page_has_buffers(page))
2677 return generic_write_end(file, mapping, pos, len,
2678 copied, page, fsdata);
2679
2680 SetPageUptodate(page);
2681 set_page_dirty(page);
2682 if (pos+copied > inode->i_size) {
2683 i_size_write(inode, pos+copied);
2684 mark_inode_dirty(inode);
2685 }
2686
2687 unlock_page(page);
2688 page_cache_release(page);
2689
2690 while (head) {
2691 bh = head;
2692 head = head->b_this_page;
2693 free_buffer_head(bh);
2694 }
2695
2696 return copied;
2697}
2698EXPORT_SYMBOL(nobh_write_end);
2699
2700
2701
2702
2703
2704
2705int nobh_writepage(struct page *page, get_block_t *get_block,
2706 struct writeback_control *wbc)
2707{
2708 struct inode * const inode = page->mapping->host;
2709 loff_t i_size = i_size_read(inode);
2710 const pgoff_t end_index = i_size >> PAGE_CACHE_SHIFT;
2711 unsigned offset;
2712 int ret;
2713
2714
2715 if (page->index < end_index)
2716 goto out;
2717
2718
2719 offset = i_size & (PAGE_CACHE_SIZE-1);
2720 if (page->index >= end_index+1 || !offset) {
2721
2722
2723
2724
2725
2726#if 0
2727
2728 if (page->mapping->a_ops->invalidatepage)
2729 page->mapping->a_ops->invalidatepage(page, offset);
2730#endif
2731 unlock_page(page);
2732 return 0;
2733 }
2734
2735
2736
2737
2738
2739
2740
2741
2742 zero_user_segment(page, offset, PAGE_CACHE_SIZE);
2743out:
2744 ret = mpage_writepage(page, get_block, wbc);
2745 if (ret == -EAGAIN)
2746 ret = __block_write_full_page(inode, page, get_block, wbc,
2747 end_buffer_async_write);
2748 return ret;
2749}
2750EXPORT_SYMBOL(nobh_writepage);
2751
2752int nobh_truncate_page(struct address_space *mapping,
2753 loff_t from, get_block_t *get_block)
2754{
2755 pgoff_t index = from >> PAGE_CACHE_SHIFT;
2756 unsigned offset = from & (PAGE_CACHE_SIZE-1);
2757 unsigned blocksize;
2758 sector_t iblock;
2759 unsigned length, pos;
2760 struct inode *inode = mapping->host;
2761 struct page *page;
2762 struct buffer_head map_bh;
2763 int err;
2764
2765 blocksize = 1 << inode->i_blkbits;
2766 length = offset & (blocksize - 1);
2767
2768
2769 if (!length)
2770 return 0;
2771
2772 length = blocksize - length;
2773 iblock = (sector_t)index << (PAGE_CACHE_SHIFT - inode->i_blkbits);
2774
2775 page = grab_cache_page(mapping, index);
2776 err = -ENOMEM;
2777 if (!page)
2778 goto out;
2779
2780 if (page_has_buffers(page)) {
2781has_buffers:
2782 unlock_page(page);
2783 page_cache_release(page);
2784 return block_truncate_page(mapping, from, get_block);
2785 }
2786
2787
2788 pos = blocksize;
2789 while (offset >= pos) {
2790 iblock++;
2791 pos += blocksize;
2792 }
2793
2794 map_bh.b_size = blocksize;
2795 map_bh.b_state = 0;
2796 err = get_block(inode, iblock, &map_bh, 0);
2797 if (err)
2798 goto unlock;
2799
2800 if (!buffer_mapped(&map_bh))
2801 goto unlock;
2802
2803
2804 if (!PageUptodate(page)) {
2805 err = mapping->a_ops->readpage(NULL, page);
2806 if (err) {
2807 page_cache_release(page);
2808 goto out;
2809 }
2810 lock_page(page);
2811 if (!PageUptodate(page)) {
2812 err = -EIO;
2813 goto unlock;
2814 }
2815 if (page_has_buffers(page))
2816 goto has_buffers;
2817 }
2818 zero_user(page, offset, length);
2819 set_page_dirty(page);
2820 err = 0;
2821
2822unlock:
2823 unlock_page(page);
2824 page_cache_release(page);
2825out:
2826 return err;
2827}
2828EXPORT_SYMBOL(nobh_truncate_page);
2829
2830int block_truncate_page(struct address_space *mapping,
2831 loff_t from, get_block_t *get_block)
2832{
2833 pgoff_t index = from >> PAGE_CACHE_SHIFT;
2834 unsigned offset = from & (PAGE_CACHE_SIZE-1);
2835 unsigned blocksize;
2836 sector_t iblock;
2837 unsigned length, pos;
2838 struct inode *inode = mapping->host;
2839 struct page *page;
2840 struct buffer_head *bh;
2841 int err;
2842
2843 blocksize = 1 << inode->i_blkbits;
2844 length = offset & (blocksize - 1);
2845
2846
2847 if (!length)
2848 return 0;
2849
2850 length = blocksize - length;
2851 iblock = (sector_t)index << (PAGE_CACHE_SHIFT - inode->i_blkbits);
2852
2853 page = grab_cache_page(mapping, index);
2854 err = -ENOMEM;
2855 if (!page)
2856 goto out;
2857
2858 if (!page_has_buffers(page))
2859 create_empty_buffers(page, blocksize, 0);
2860
2861
2862 bh = page_buffers(page);
2863 pos = blocksize;
2864 while (offset >= pos) {
2865 bh = bh->b_this_page;
2866 iblock++;
2867 pos += blocksize;
2868 }
2869
2870 err = 0;
2871 if (!buffer_mapped(bh)) {
2872 WARN_ON(bh->b_size != blocksize);
2873 err = get_block(inode, iblock, bh, 0);
2874 if (err)
2875 goto unlock;
2876
2877 if (!buffer_mapped(bh))
2878 goto unlock;
2879 }
2880
2881
2882 if (PageUptodate(page))
2883 set_buffer_uptodate(bh);
2884
2885 if (!buffer_uptodate(bh) && !buffer_delay(bh) && !buffer_unwritten(bh)) {
2886 err = -EIO;
2887 ll_rw_block(READ, 1, &bh);
2888 wait_on_buffer(bh);
2889
2890 if (!buffer_uptodate(bh))
2891 goto unlock;
2892 }
2893
2894 zero_user(page, offset, length);
2895 mark_buffer_dirty(bh);
2896 err = 0;
2897
2898unlock:
2899 unlock_page(page);
2900 page_cache_release(page);
2901out:
2902 return err;
2903}
2904EXPORT_SYMBOL(block_truncate_page);
2905
2906
2907
2908
2909int block_write_full_page(struct page *page, get_block_t *get_block,
2910 struct writeback_control *wbc)
2911{
2912 struct inode * const inode = page->mapping->host;
2913 loff_t i_size = i_size_read(inode);
2914 const pgoff_t end_index = i_size >> PAGE_CACHE_SHIFT;
2915 unsigned offset;
2916
2917
2918 if (page->index < end_index)
2919 return __block_write_full_page(inode, page, get_block, wbc,
2920 end_buffer_async_write);
2921
2922
2923 offset = i_size & (PAGE_CACHE_SIZE-1);
2924 if (page->index >= end_index+1 || !offset) {
2925
2926
2927
2928
2929
2930 do_invalidatepage(page, 0);
2931 unlock_page(page);
2932 return 0;
2933 }
2934
2935
2936
2937
2938
2939
2940
2941
2942 zero_user_segment(page, offset, PAGE_CACHE_SIZE);
2943 return __block_write_full_page(inode, page, get_block, wbc,
2944 end_buffer_async_write);
2945}
2946EXPORT_SYMBOL(block_write_full_page);
2947
2948sector_t generic_block_bmap(struct address_space *mapping, sector_t block,
2949 get_block_t *get_block)
2950{
2951 struct buffer_head tmp;
2952 struct inode *inode = mapping->host;
2953 tmp.b_state = 0;
2954 tmp.b_blocknr = 0;
2955 tmp.b_size = 1 << inode->i_blkbits;
2956 get_block(inode, block, &tmp, 0);
2957 return tmp.b_blocknr;
2958}
2959EXPORT_SYMBOL(generic_block_bmap);
2960
2961static void end_bio_bh_io_sync(struct bio *bio, int err)
2962{
2963 struct buffer_head *bh = bio->bi_private;
2964
2965 if (err == -EOPNOTSUPP) {
2966 set_bit(BIO_EOPNOTSUPP, &bio->bi_flags);
2967 }
2968
2969 if (unlikely (test_bit(BIO_QUIET,&bio->bi_flags)))
2970 set_bit(BH_Quiet, &bh->b_state);
2971
2972 bh->b_end_io(bh, test_bit(BIO_UPTODATE, &bio->bi_flags));
2973 bio_put(bio);
2974}
2975
2976
2977
2978
2979
2980
2981
2982
2983
2984
2985
2986
2987
2988void guard_bio_eod(int rw, struct bio *bio)
2989{
2990 sector_t maxsector;
2991 struct bio_vec *bvec = &bio->bi_io_vec[bio->bi_vcnt - 1];
2992 unsigned truncated_bytes;
2993
2994 maxsector = i_size_read(bio->bi_bdev->bd_inode) >> 9;
2995 if (!maxsector)
2996 return;
2997
2998
2999
3000
3001
3002
3003 if (unlikely(bio->bi_sector >= maxsector))
3004 return;
3005
3006 maxsector -= bio->bi_sector;
3007 if (likely((bio->bi_size >> 9) <= maxsector))
3008 return;
3009
3010
3011 truncated_bytes = bio->bi_size - (maxsector << 9);
3012
3013
3014 bio->bi_size -= truncated_bytes;
3015 bvec->bv_len -= truncated_bytes;
3016
3017
3018 if ((rw & RW_MASK) == READ) {
3019 zero_user(bvec->bv_page, bvec->bv_offset + bvec->bv_len,
3020 truncated_bytes);
3021 }
3022}
3023
3024int _submit_bh(int rw, struct buffer_head *bh, unsigned long bio_flags)
3025{
3026 struct bio *bio;
3027 int ret = 0;
3028
3029 BUG_ON(!buffer_locked(bh));
3030 BUG_ON(!buffer_mapped(bh));
3031 BUG_ON(!bh->b_end_io);
3032 BUG_ON(buffer_delay(bh));
3033 BUG_ON(buffer_unwritten(bh));
3034
3035
3036
3037
3038 if (test_set_buffer_req(bh) && (rw & WRITE))
3039 clear_buffer_write_io_error(bh);
3040
3041
3042
3043
3044
3045 bio = bio_alloc(GFP_NOIO, 1);
3046
3047 bio->bi_sector = bh->b_blocknr * (bh->b_size >> 9);
3048 bio->bi_bdev = bh->b_bdev;
3049 bio->bi_io_vec[0].bv_page = bh->b_page;
3050 bio->bi_io_vec[0].bv_len = bh->b_size;
3051 bio->bi_io_vec[0].bv_offset = bh_offset(bh);
3052
3053 bio->bi_vcnt = 1;
3054 bio->bi_size = bh->b_size;
3055
3056 bio->bi_end_io = end_bio_bh_io_sync;
3057 bio->bi_private = bh;
3058 bio->bi_flags |= bio_flags;
3059
3060
3061 guard_bio_eod(rw, bio);
3062
3063 if (buffer_meta(bh))
3064 rw |= REQ_META;
3065 if (buffer_prio(bh))
3066 rw |= REQ_PRIO;
3067
3068 bio_get(bio);
3069 submit_bio(rw, bio);
3070
3071 if (bio_flagged(bio, BIO_EOPNOTSUPP))
3072 ret = -EOPNOTSUPP;
3073
3074 bio_put(bio);
3075 return ret;
3076}
3077EXPORT_SYMBOL_GPL(_submit_bh);
3078
3079int submit_bh(int rw, struct buffer_head *bh)
3080{
3081 return _submit_bh(rw, bh, 0);
3082}
3083EXPORT_SYMBOL(submit_bh);
3084
3085
3086
3087
3088
3089
3090
3091
3092
3093
3094
3095
3096
3097
3098
3099
3100
3101
3102
3103
3104
3105
3106
3107
3108
3109
3110void ll_rw_block(int rw, int nr, struct buffer_head *bhs[])
3111{
3112 int i;
3113
3114 for (i = 0; i < nr; i++) {
3115 struct buffer_head *bh = bhs[i];
3116
3117 if (!trylock_buffer(bh))
3118 continue;
3119 if (rw == WRITE) {
3120 if (test_clear_buffer_dirty(bh)) {
3121 bh->b_end_io = end_buffer_write_sync;
3122 get_bh(bh);
3123 submit_bh(WRITE, bh);
3124 continue;
3125 }
3126 } else {
3127 if (!buffer_uptodate(bh)) {
3128 bh->b_end_io = end_buffer_read_sync;
3129 get_bh(bh);
3130 submit_bh(rw, bh);
3131 continue;
3132 }
3133 }
3134 unlock_buffer(bh);
3135 }
3136}
3137EXPORT_SYMBOL(ll_rw_block);
3138
3139void write_dirty_buffer(struct buffer_head *bh, int rw)
3140{
3141 lock_buffer(bh);
3142 if (!test_clear_buffer_dirty(bh)) {
3143 unlock_buffer(bh);
3144 return;
3145 }
3146 bh->b_end_io = end_buffer_write_sync;
3147 get_bh(bh);
3148 submit_bh(rw, bh);
3149}
3150EXPORT_SYMBOL(write_dirty_buffer);
3151
3152
3153
3154
3155
3156
3157int __sync_dirty_buffer(struct buffer_head *bh, int rw)
3158{
3159 int ret = 0;
3160
3161 WARN_ON(atomic_read(&bh->b_count) < 1);
3162 lock_buffer(bh);
3163 if (test_clear_buffer_dirty(bh)) {
3164 get_bh(bh);
3165 bh->b_end_io = end_buffer_write_sync;
3166 ret = submit_bh(rw, bh);
3167 wait_on_buffer(bh);
3168 if (!ret && !buffer_uptodate(bh))
3169 ret = -EIO;
3170 } else {
3171 unlock_buffer(bh);
3172 }
3173 return ret;
3174}
3175EXPORT_SYMBOL(__sync_dirty_buffer);
3176
3177int sync_dirty_buffer(struct buffer_head *bh)
3178{
3179 return __sync_dirty_buffer(bh, WRITE_SYNC);
3180}
3181EXPORT_SYMBOL(sync_dirty_buffer);
3182
3183
3184
3185
3186
3187
3188
3189
3190
3191
3192
3193
3194
3195
3196
3197
3198
3199
3200
3201
3202
3203static inline int buffer_busy(struct buffer_head *bh)
3204{
3205 return atomic_read(&bh->b_count) |
3206 (bh->b_state & ((1 << BH_Dirty) | (1 << BH_Lock)));
3207}
3208
3209static int
3210drop_buffers(struct page *page, struct buffer_head **buffers_to_free)
3211{
3212 struct buffer_head *head = page_buffers(page);
3213 struct buffer_head *bh;
3214
3215 bh = head;
3216 do {
3217 if (buffer_write_io_error(bh) && page->mapping)
3218 set_bit(AS_EIO, &page->mapping->flags);
3219 if (buffer_busy(bh))
3220 goto failed;
3221 bh = bh->b_this_page;
3222 } while (bh != head);
3223
3224 do {
3225 struct buffer_head *next = bh->b_this_page;
3226
3227 if (bh->b_assoc_map)
3228 __remove_assoc_queue(bh);
3229 bh = next;
3230 } while (bh != head);
3231 *buffers_to_free = head;
3232 __clear_page_buffers(page);
3233 return 1;
3234failed:
3235 return 0;
3236}
3237
3238int try_to_free_buffers(struct page *page)
3239{
3240 struct address_space * const mapping = page->mapping;
3241 struct buffer_head *buffers_to_free = NULL;
3242 int ret = 0;
3243
3244 BUG_ON(!PageLocked(page));
3245 if (PageWriteback(page))
3246 return 0;
3247
3248 if (mapping == NULL) {
3249 ret = drop_buffers(page, &buffers_to_free);
3250 goto out;
3251 }
3252
3253 spin_lock(&mapping->private_lock);
3254 ret = drop_buffers(page, &buffers_to_free);
3255
3256
3257
3258
3259
3260
3261
3262
3263
3264
3265
3266
3267
3268
3269
3270 if (ret)
3271 cancel_dirty_page(page, PAGE_CACHE_SIZE);
3272 spin_unlock(&mapping->private_lock);
3273out:
3274 if (buffers_to_free) {
3275 struct buffer_head *bh = buffers_to_free;
3276
3277 do {
3278 struct buffer_head *next = bh->b_this_page;
3279 free_buffer_head(bh);
3280 bh = next;
3281 } while (bh != buffers_to_free);
3282 }
3283 return ret;
3284}
3285EXPORT_SYMBOL(try_to_free_buffers);
3286
3287
3288
3289
3290
3291
3292
3293
3294SYSCALL_DEFINE2(bdflush, int, func, long, data)
3295{
3296 static int msg_count;
3297
3298 if (!capable(CAP_SYS_ADMIN))
3299 return -EPERM;
3300
3301 if (msg_count < 5) {
3302 msg_count++;
3303 printk(KERN_INFO
3304 "warning: process `%s' used the obsolete bdflush"
3305 " system call\n", current->comm);
3306 printk(KERN_INFO "Fix your initscripts?\n");
3307 }
3308
3309 if (func == 1)
3310 do_exit(0);
3311 return 0;
3312}
3313
3314
3315
3316
3317static struct kmem_cache *bh_cachep __read_mostly;
3318
3319
3320
3321
3322
3323static unsigned long max_buffer_heads;
3324
3325int buffer_heads_over_limit;
3326
3327struct bh_accounting {
3328 int nr;
3329 int ratelimit;
3330};
3331
3332static DEFINE_PER_CPU(struct bh_accounting, bh_accounting) = {0, 0};
3333
3334static void recalc_bh_state(void)
3335{
3336 int i;
3337 int tot = 0;
3338
3339 if (__this_cpu_inc_return(bh_accounting.ratelimit) - 1 < 4096)
3340 return;
3341 __this_cpu_write(bh_accounting.ratelimit, 0);
3342 for_each_online_cpu(i)
3343 tot += per_cpu(bh_accounting, i).nr;
3344 buffer_heads_over_limit = (tot > max_buffer_heads);
3345}
3346
3347struct buffer_head *alloc_buffer_head(gfp_t gfp_flags)
3348{
3349 struct buffer_head *ret = kmem_cache_zalloc(bh_cachep, gfp_flags);
3350 if (ret) {
3351 INIT_LIST_HEAD(&ret->b_assoc_buffers);
3352 preempt_disable();
3353 __this_cpu_inc(bh_accounting.nr);
3354 recalc_bh_state();
3355 preempt_enable();
3356 }
3357 return ret;
3358}
3359EXPORT_SYMBOL(alloc_buffer_head);
3360
3361void free_buffer_head(struct buffer_head *bh)
3362{
3363 BUG_ON(!list_empty(&bh->b_assoc_buffers));
3364 kmem_cache_free(bh_cachep, bh);
3365 preempt_disable();
3366 __this_cpu_dec(bh_accounting.nr);
3367 recalc_bh_state();
3368 preempt_enable();
3369}
3370EXPORT_SYMBOL(free_buffer_head);
3371
3372static void buffer_exit_cpu(int cpu)
3373{
3374 int i;
3375 struct bh_lru *b = &per_cpu(bh_lrus, cpu);
3376
3377 for (i = 0; i < BH_LRU_SIZE; i++) {
3378 brelse(b->bhs[i]);
3379 b->bhs[i] = NULL;
3380 }
3381 this_cpu_add(bh_accounting.nr, per_cpu(bh_accounting, cpu).nr);
3382 per_cpu(bh_accounting, cpu).nr = 0;
3383}
3384
3385static int buffer_cpu_notify(struct notifier_block *self,
3386 unsigned long action, void *hcpu)
3387{
3388 if (action == CPU_DEAD || action == CPU_DEAD_FROZEN)
3389 buffer_exit_cpu((unsigned long)hcpu);
3390 return NOTIFY_OK;
3391}
3392
3393
3394
3395
3396
3397
3398
3399
3400int bh_uptodate_or_lock(struct buffer_head *bh)
3401{
3402 if (!buffer_uptodate(bh)) {
3403 lock_buffer(bh);
3404 if (!buffer_uptodate(bh))
3405 return 0;
3406 unlock_buffer(bh);
3407 }
3408 return 1;
3409}
3410EXPORT_SYMBOL(bh_uptodate_or_lock);
3411
3412
3413
3414
3415
3416
3417
3418int bh_submit_read(struct buffer_head *bh)
3419{
3420 BUG_ON(!buffer_locked(bh));
3421
3422 if (buffer_uptodate(bh)) {
3423 unlock_buffer(bh);
3424 return 0;
3425 }
3426
3427 get_bh(bh);
3428 bh->b_end_io = end_buffer_read_sync;
3429 submit_bh(READ, bh);
3430 wait_on_buffer(bh);
3431 if (buffer_uptodate(bh))
3432 return 0;
3433 return -EIO;
3434}
3435EXPORT_SYMBOL(bh_submit_read);
3436
3437
3438
3439
3440
3441
3442static loff_t
3443page_seek_hole_data(struct page *page, loff_t lastoff, int whence)
3444{
3445 loff_t offset = page_offset(page);
3446 struct buffer_head *bh, *head;
3447 bool seek_data = whence == SEEK_DATA;
3448
3449 if (lastoff < offset)
3450 lastoff = offset;
3451
3452 bh = head = page_buffers(page);
3453 do {
3454 offset += bh->b_size;
3455 if (lastoff >= offset)
3456 continue;
3457
3458
3459
3460
3461
3462
3463
3464
3465
3466 if ((buffer_unwritten(bh) || buffer_uptodate(bh)) == seek_data)
3467 return lastoff;
3468
3469 lastoff = offset;
3470 } while ((bh = bh->b_this_page) != head);
3471 return -ENOENT;
3472}
3473
3474
3475
3476
3477
3478
3479
3480
3481
3482
3483loff_t
3484page_cache_seek_hole_data(struct inode *inode, loff_t offset, loff_t length,
3485 int whence)
3486{
3487 pgoff_t index = offset >> PAGE_SHIFT;
3488 pgoff_t end = DIV_ROUND_UP(offset + length, PAGE_SIZE);
3489 loff_t lastoff = offset;
3490 struct pagevec pvec;
3491
3492 if (length <= 0)
3493 return -ENOENT;
3494
3495 pagevec_init(&pvec, 0);
3496
3497 do {
3498 unsigned want, nr_pages, i;
3499
3500 want = min_t(unsigned, end - index, PAGEVEC_SIZE);
3501 nr_pages = pagevec_lookup(&pvec, inode->i_mapping, index, want);
3502 if (nr_pages == 0)
3503 break;
3504
3505 for (i = 0; i < nr_pages; i++) {
3506 struct page *page = pvec.pages[i];
3507
3508
3509
3510
3511
3512
3513
3514
3515
3516
3517
3518 if (whence == SEEK_HOLE &&
3519 lastoff < page_offset(page))
3520 goto check_range;
3521
3522
3523 if (page->index >= end)
3524 goto not_found;
3525
3526 lock_page(page);
3527 if (likely(page->mapping == inode->i_mapping) &&
3528 page_has_buffers(page)) {
3529 lastoff = page_seek_hole_data(page, lastoff, whence);
3530 if (lastoff >= 0) {
3531 unlock_page(page);
3532 goto check_range;
3533 }
3534 }
3535 unlock_page(page);
3536 lastoff = page_offset(page) + PAGE_SIZE;
3537 }
3538
3539
3540 if (nr_pages < want)
3541 break;
3542
3543 index = pvec.pages[i - 1]->index + 1;
3544 pagevec_release(&pvec);
3545 } while (index < end);
3546
3547
3548 if (whence != SEEK_HOLE)
3549 goto not_found;
3550
3551check_range:
3552 if (lastoff < offset + length)
3553 goto out;
3554not_found:
3555 lastoff = -ENOENT;
3556out:
3557 pagevec_release(&pvec);
3558 return lastoff;
3559}
3560
3561void __init buffer_init(void)
3562{
3563 unsigned long nrpages;
3564
3565 bh_cachep = kmem_cache_create("buffer_head",
3566 sizeof(struct buffer_head), 0,
3567 (SLAB_RECLAIM_ACCOUNT|SLAB_PANIC|
3568 SLAB_MEM_SPREAD),
3569 NULL);
3570
3571
3572
3573
3574 nrpages = (nr_free_buffer_pages() * 10) / 100;
3575 max_buffer_heads = nrpages * (PAGE_SIZE / sizeof(struct buffer_head));
3576 hotcpu_notifier(buffer_cpu_notify, 0);
3577}
3578