1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21#include <linux/kernel.h>
22#include <linux/syscalls.h>
23#include <linux/fs.h>
24#include <linux/mm.h>
25#include <linux/percpu.h>
26#include <linux/slab.h>
27#include <linux/capability.h>
28#include <linux/blkdev.h>
29#include <linux/file.h>
30#include <linux/quotaops.h>
31#include <linux/highmem.h>
32#include <linux/export.h>
33#include <linux/writeback.h>
34#include <linux/hash.h>
35#include <linux/suspend.h>
36#include <linux/buffer_head.h>
37#include <linux/task_io_accounting_ops.h>
38#include <linux/bio.h>
39#include <linux/notifier.h>
40#include <linux/cpu.h>
41#include <linux/bitops.h>
42#include <linux/mpage.h>
43#include <linux/bit_spinlock.h>
44
45static int fsync_buffers_list(spinlock_t *lock, struct list_head *list);
46
47#define BH_ENTRY(list) list_entry((list), struct buffer_head, b_assoc_buffers)
48
49inline void
50init_buffer(struct buffer_head *bh, bh_end_io_t *handler, void *private)
51{
52 bh->b_end_io = handler;
53 bh->b_private = private;
54}
55EXPORT_SYMBOL(init_buffer);
56
57static int sleep_on_buffer(void *word)
58{
59 io_schedule();
60 return 0;
61}
62
63void __lock_buffer(struct buffer_head *bh)
64{
65 wait_on_bit_lock(&bh->b_state, BH_Lock, sleep_on_buffer,
66 TASK_UNINTERRUPTIBLE);
67}
68EXPORT_SYMBOL(__lock_buffer);
69
70void unlock_buffer(struct buffer_head *bh)
71{
72 clear_bit_unlock(BH_Lock, &bh->b_state);
73 smp_mb__after_clear_bit();
74 wake_up_bit(&bh->b_state, BH_Lock);
75}
76EXPORT_SYMBOL(unlock_buffer);
77
78
79
80
81
82
83void __wait_on_buffer(struct buffer_head * bh)
84{
85 wait_on_bit(&bh->b_state, BH_Lock, sleep_on_buffer, TASK_UNINTERRUPTIBLE);
86}
87EXPORT_SYMBOL(__wait_on_buffer);
88
89static void
90__clear_page_buffers(struct page *page)
91{
92 ClearPagePrivate(page);
93 set_page_private(page, 0);
94 page_cache_release(page);
95}
96
97
98static int quiet_error(struct buffer_head *bh)
99{
100 if (!test_bit(BH_Quiet, &bh->b_state) && printk_ratelimit())
101 return 0;
102 return 1;
103}
104
105
106static void buffer_io_error(struct buffer_head *bh)
107{
108 char b[BDEVNAME_SIZE];
109 printk(KERN_ERR "Buffer I/O error on device %s, logical block %Lu\n",
110 bdevname(bh->b_bdev, b),
111 (unsigned long long)bh->b_blocknr);
112}
113
114
115
116
117
118
119
120
121
122static void __end_buffer_read_notouch(struct buffer_head *bh, int uptodate)
123{
124 if (uptodate) {
125 set_buffer_uptodate(bh);
126 } else {
127
128 clear_buffer_uptodate(bh);
129 }
130 unlock_buffer(bh);
131}
132
133
134
135
136
137void end_buffer_read_sync(struct buffer_head *bh, int uptodate)
138{
139 __end_buffer_read_notouch(bh, uptodate);
140 put_bh(bh);
141}
142EXPORT_SYMBOL(end_buffer_read_sync);
143
144void end_buffer_write_sync(struct buffer_head *bh, int uptodate)
145{
146 char b[BDEVNAME_SIZE];
147
148 if (uptodate) {
149 set_buffer_uptodate(bh);
150 } else {
151 if (!quiet_error(bh)) {
152 buffer_io_error(bh);
153 printk(KERN_WARNING "lost page write due to "
154 "I/O error on %s\n",
155 bdevname(bh->b_bdev, b));
156 }
157 set_buffer_write_io_error(bh);
158 clear_buffer_uptodate(bh);
159 }
160 unlock_buffer(bh);
161 put_bh(bh);
162}
163EXPORT_SYMBOL(end_buffer_write_sync);
164
165
166
167
168
169
170
171
172
173
174
175
176static struct buffer_head *
177__find_get_block_slow(struct block_device *bdev, sector_t block)
178{
179 struct inode *bd_inode = bdev->bd_inode;
180 struct address_space *bd_mapping = bd_inode->i_mapping;
181 struct buffer_head *ret = NULL;
182 pgoff_t index;
183 struct buffer_head *bh;
184 struct buffer_head *head;
185 struct page *page;
186 int all_mapped = 1;
187
188 index = block >> (PAGE_CACHE_SHIFT - bd_inode->i_blkbits);
189 page = find_get_page(bd_mapping, index);
190 if (!page)
191 goto out;
192
193 spin_lock(&bd_mapping->private_lock);
194 if (!page_has_buffers(page))
195 goto out_unlock;
196 head = page_buffers(page);
197 bh = head;
198 do {
199 if (!buffer_mapped(bh))
200 all_mapped = 0;
201 else if (bh->b_blocknr == block) {
202 ret = bh;
203 get_bh(bh);
204 goto out_unlock;
205 }
206 bh = bh->b_this_page;
207 } while (bh != head);
208
209
210
211
212
213
214 if (all_mapped) {
215 char b[BDEVNAME_SIZE];
216
217 printk("__find_get_block_slow() failed. "
218 "block=%llu, b_blocknr=%llu\n",
219 (unsigned long long)block,
220 (unsigned long long)bh->b_blocknr);
221 printk("b_state=0x%08lx, b_size=%zu\n",
222 bh->b_state, bh->b_size);
223 printk("device %s blocksize: %d\n", bdevname(bdev, b),
224 1 << bd_inode->i_blkbits);
225 }
226out_unlock:
227 spin_unlock(&bd_mapping->private_lock);
228 page_cache_release(page);
229out:
230 return ret;
231}
232
233
234
235
236static void free_more_memory(void)
237{
238 struct zone *zone;
239 int nid;
240
241 wakeup_flusher_threads(1024, WB_REASON_FREE_MORE_MEM);
242 yield();
243
244 for_each_online_node(nid) {
245 (void)first_zones_zonelist(node_zonelist(nid, GFP_NOFS),
246 gfp_zone(GFP_NOFS), NULL,
247 &zone);
248 if (zone)
249 try_to_free_pages(node_zonelist(nid, GFP_NOFS), 0,
250 GFP_NOFS, NULL);
251 }
252}
253
254
255
256
257
258static void end_buffer_async_read(struct buffer_head *bh, int uptodate)
259{
260 unsigned long flags;
261 struct buffer_head *first;
262 struct buffer_head *tmp;
263 struct page *page;
264 int page_uptodate = 1;
265
266 BUG_ON(!buffer_async_read(bh));
267
268 page = bh->b_page;
269 if (uptodate) {
270 set_buffer_uptodate(bh);
271 } else {
272 clear_buffer_uptodate(bh);
273 if (!quiet_error(bh))
274 buffer_io_error(bh);
275 SetPageError(page);
276 }
277
278
279
280
281
282
283 first = page_buffers(page);
284 local_irq_save(flags);
285 bit_spin_lock(BH_Uptodate_Lock, &first->b_state);
286 clear_buffer_async_read(bh);
287 unlock_buffer(bh);
288 tmp = bh;
289 do {
290 if (!buffer_uptodate(tmp))
291 page_uptodate = 0;
292 if (buffer_async_read(tmp)) {
293 BUG_ON(!buffer_locked(tmp));
294 goto still_busy;
295 }
296 tmp = tmp->b_this_page;
297 } while (tmp != bh);
298 bit_spin_unlock(BH_Uptodate_Lock, &first->b_state);
299 local_irq_restore(flags);
300
301
302
303
304
305 if (page_uptodate && !PageError(page))
306 SetPageUptodate(page);
307 unlock_page(page);
308 return;
309
310still_busy:
311 bit_spin_unlock(BH_Uptodate_Lock, &first->b_state);
312 local_irq_restore(flags);
313 return;
314}
315
316
317
318
319
320void end_buffer_async_write(struct buffer_head *bh, int uptodate)
321{
322 char b[BDEVNAME_SIZE];
323 unsigned long flags;
324 struct buffer_head *first;
325 struct buffer_head *tmp;
326 struct page *page;
327
328 BUG_ON(!buffer_async_write(bh));
329
330 page = bh->b_page;
331 if (uptodate) {
332 set_buffer_uptodate(bh);
333 } else {
334 if (!quiet_error(bh)) {
335 buffer_io_error(bh);
336 printk(KERN_WARNING "lost page write due to "
337 "I/O error on %s\n",
338 bdevname(bh->b_bdev, b));
339 }
340 set_bit(AS_EIO, &page->mapping->flags);
341 set_buffer_write_io_error(bh);
342 clear_buffer_uptodate(bh);
343 SetPageError(page);
344 }
345
346 first = page_buffers(page);
347 local_irq_save(flags);
348 bit_spin_lock(BH_Uptodate_Lock, &first->b_state);
349
350 clear_buffer_async_write(bh);
351 unlock_buffer(bh);
352 tmp = bh->b_this_page;
353 while (tmp != bh) {
354 if (buffer_async_write(tmp)) {
355 BUG_ON(!buffer_locked(tmp));
356 goto still_busy;
357 }
358 tmp = tmp->b_this_page;
359 }
360 bit_spin_unlock(BH_Uptodate_Lock, &first->b_state);
361 local_irq_restore(flags);
362 end_page_writeback(page);
363 return;
364
365still_busy:
366 bit_spin_unlock(BH_Uptodate_Lock, &first->b_state);
367 local_irq_restore(flags);
368 return;
369}
370EXPORT_SYMBOL(end_buffer_async_write);
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393static void mark_buffer_async_read(struct buffer_head *bh)
394{
395 bh->b_end_io = end_buffer_async_read;
396 set_buffer_async_read(bh);
397}
398
399static void mark_buffer_async_write_endio(struct buffer_head *bh,
400 bh_end_io_t *handler)
401{
402 bh->b_end_io = handler;
403 set_buffer_async_write(bh);
404}
405
406void mark_buffer_async_write(struct buffer_head *bh)
407{
408 mark_buffer_async_write_endio(bh, end_buffer_async_write);
409}
410EXPORT_SYMBOL(mark_buffer_async_write);
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465static void __remove_assoc_queue(struct buffer_head *bh)
466{
467 list_del_init(&bh->b_assoc_buffers);
468 WARN_ON(!bh->b_assoc_map);
469 if (buffer_write_io_error(bh))
470 set_bit(AS_EIO, &bh->b_assoc_map->flags);
471 bh->b_assoc_map = NULL;
472}
473
474int inode_has_buffers(struct inode *inode)
475{
476 return !list_empty(&inode->i_data.private_list);
477}
478
479
480
481
482
483
484
485
486
487
488
489static int osync_buffers_list(spinlock_t *lock, struct list_head *list)
490{
491 struct buffer_head *bh;
492 struct list_head *p;
493 int err = 0;
494
495 spin_lock(lock);
496repeat:
497 list_for_each_prev(p, list) {
498 bh = BH_ENTRY(p);
499 if (buffer_locked(bh)) {
500 get_bh(bh);
501 spin_unlock(lock);
502 wait_on_buffer(bh);
503 if (!buffer_uptodate(bh))
504 err = -EIO;
505 brelse(bh);
506 spin_lock(lock);
507 goto repeat;
508 }
509 }
510 spin_unlock(lock);
511 return err;
512}
513
514static void do_thaw_one(struct super_block *sb, void *unused)
515{
516 char b[BDEVNAME_SIZE];
517 while (sb->s_bdev && !thaw_bdev(sb->s_bdev, sb))
518 printk(KERN_WARNING "Emergency Thaw on %s\n",
519 bdevname(sb->s_bdev, b));
520}
521
522static void do_thaw_all(struct work_struct *work)
523{
524 iterate_supers(do_thaw_one, NULL);
525 kfree(work);
526 printk(KERN_WARNING "Emergency Thaw complete\n");
527}
528
529
530
531
532
533
534void emergency_thaw_all(void)
535{
536 struct work_struct *work;
537
538 work = kmalloc(sizeof(*work), GFP_ATOMIC);
539 if (work) {
540 INIT_WORK(work, do_thaw_all);
541 schedule_work(work);
542 }
543}
544
545
546
547
548
549
550
551
552
553
554
555
556int sync_mapping_buffers(struct address_space *mapping)
557{
558 struct address_space *buffer_mapping = mapping->assoc_mapping;
559
560 if (buffer_mapping == NULL || list_empty(&mapping->private_list))
561 return 0;
562
563 return fsync_buffers_list(&buffer_mapping->private_lock,
564 &mapping->private_list);
565}
566EXPORT_SYMBOL(sync_mapping_buffers);
567
568
569
570
571
572
573
574void write_boundary_block(struct block_device *bdev,
575 sector_t bblock, unsigned blocksize)
576{
577 struct buffer_head *bh = __find_get_block(bdev, bblock + 1, blocksize);
578 if (bh) {
579 if (buffer_dirty(bh))
580 ll_rw_block(WRITE, 1, &bh);
581 put_bh(bh);
582 }
583}
584
585void mark_buffer_dirty_inode(struct buffer_head *bh, struct inode *inode)
586{
587 struct address_space *mapping = inode->i_mapping;
588 struct address_space *buffer_mapping = bh->b_page->mapping;
589
590 mark_buffer_dirty(bh);
591 if (!mapping->assoc_mapping) {
592 mapping->assoc_mapping = buffer_mapping;
593 } else {
594 BUG_ON(mapping->assoc_mapping != buffer_mapping);
595 }
596 if (!bh->b_assoc_map) {
597 spin_lock(&buffer_mapping->private_lock);
598 list_move_tail(&bh->b_assoc_buffers,
599 &mapping->private_list);
600 bh->b_assoc_map = mapping;
601 spin_unlock(&buffer_mapping->private_lock);
602 }
603}
604EXPORT_SYMBOL(mark_buffer_dirty_inode);
605
606
607
608
609
610
611
612
613static void __set_page_dirty(struct page *page,
614 struct address_space *mapping, int warn)
615{
616 spin_lock_irq(&mapping->tree_lock);
617 if (page->mapping) {
618 WARN_ON_ONCE(warn && !PageUptodate(page));
619 account_page_dirtied(page, mapping);
620 radix_tree_tag_set(&mapping->page_tree,
621 page_index(page), PAGECACHE_TAG_DIRTY);
622 }
623 spin_unlock_irq(&mapping->tree_lock);
624 __mark_inode_dirty(mapping->host, I_DIRTY_PAGES);
625}
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652int __set_page_dirty_buffers(struct page *page)
653{
654 int newly_dirty;
655 struct address_space *mapping = page_mapping(page);
656
657 if (unlikely(!mapping))
658 return !TestSetPageDirty(page);
659
660 spin_lock(&mapping->private_lock);
661 if (page_has_buffers(page)) {
662 struct buffer_head *head = page_buffers(page);
663 struct buffer_head *bh = head;
664
665 do {
666 set_buffer_dirty(bh);
667 bh = bh->b_this_page;
668 } while (bh != head);
669 }
670 newly_dirty = !TestSetPageDirty(page);
671 spin_unlock(&mapping->private_lock);
672
673 if (newly_dirty)
674 __set_page_dirty(page, mapping, 1);
675 return newly_dirty;
676}
677EXPORT_SYMBOL(__set_page_dirty_buffers);
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698static int fsync_buffers_list(spinlock_t *lock, struct list_head *list)
699{
700 struct buffer_head *bh;
701 struct list_head tmp;
702 struct address_space *mapping;
703 int err = 0, err2;
704 struct blk_plug plug;
705
706 INIT_LIST_HEAD(&tmp);
707 blk_start_plug(&plug);
708
709 spin_lock(lock);
710 while (!list_empty(list)) {
711 bh = BH_ENTRY(list->next);
712 mapping = bh->b_assoc_map;
713 __remove_assoc_queue(bh);
714
715
716 smp_mb();
717 if (buffer_dirty(bh) || buffer_locked(bh)) {
718 list_add(&bh->b_assoc_buffers, &tmp);
719 bh->b_assoc_map = mapping;
720 if (buffer_dirty(bh)) {
721 get_bh(bh);
722 spin_unlock(lock);
723
724
725
726
727
728
729
730 write_dirty_buffer(bh, WRITE_SYNC);
731
732
733
734
735
736
737
738 brelse(bh);
739 spin_lock(lock);
740 }
741 }
742 }
743
744 spin_unlock(lock);
745 blk_finish_plug(&plug);
746 spin_lock(lock);
747
748 while (!list_empty(&tmp)) {
749 bh = BH_ENTRY(tmp.prev);
750 get_bh(bh);
751 mapping = bh->b_assoc_map;
752 __remove_assoc_queue(bh);
753
754
755 smp_mb();
756 if (buffer_dirty(bh)) {
757 list_add(&bh->b_assoc_buffers,
758 &mapping->private_list);
759 bh->b_assoc_map = mapping;
760 }
761 spin_unlock(lock);
762 wait_on_buffer(bh);
763 if (!buffer_uptodate(bh))
764 err = -EIO;
765 brelse(bh);
766 spin_lock(lock);
767 }
768
769 spin_unlock(lock);
770 err2 = osync_buffers_list(lock, list);
771 if (err)
772 return err;
773 else
774 return err2;
775}
776
777
778
779
780
781
782
783
784
785
786void invalidate_inode_buffers(struct inode *inode)
787{
788 if (inode_has_buffers(inode)) {
789 struct address_space *mapping = &inode->i_data;
790 struct list_head *list = &mapping->private_list;
791 struct address_space *buffer_mapping = mapping->assoc_mapping;
792
793 spin_lock(&buffer_mapping->private_lock);
794 while (!list_empty(list))
795 __remove_assoc_queue(BH_ENTRY(list->next));
796 spin_unlock(&buffer_mapping->private_lock);
797 }
798}
799EXPORT_SYMBOL(invalidate_inode_buffers);
800
801
802
803
804
805
806
807int remove_inode_buffers(struct inode *inode)
808{
809 int ret = 1;
810
811 if (inode_has_buffers(inode)) {
812 struct address_space *mapping = &inode->i_data;
813 struct list_head *list = &mapping->private_list;
814 struct address_space *buffer_mapping = mapping->assoc_mapping;
815
816 spin_lock(&buffer_mapping->private_lock);
817 while (!list_empty(list)) {
818 struct buffer_head *bh = BH_ENTRY(list->next);
819 if (buffer_dirty(bh)) {
820 ret = 0;
821 break;
822 }
823 __remove_assoc_queue(bh);
824 }
825 spin_unlock(&buffer_mapping->private_lock);
826 }
827 return ret;
828}
829
830
831
832
833
834
835
836
837
838
839struct buffer_head *alloc_page_buffers(struct page *page, unsigned long size,
840 int retry)
841{
842 struct buffer_head *bh, *head;
843 long offset;
844
845try_again:
846 head = NULL;
847 offset = PAGE_SIZE;
848 while ((offset -= size) >= 0) {
849 bh = alloc_buffer_head(GFP_NOFS);
850 if (!bh)
851 goto no_grow;
852
853 bh->b_bdev = NULL;
854 bh->b_this_page = head;
855 bh->b_blocknr = -1;
856 head = bh;
857
858 bh->b_state = 0;
859 atomic_set(&bh->b_count, 0);
860 bh->b_size = size;
861
862
863 set_bh_page(bh, page, offset);
864
865 init_buffer(bh, NULL, NULL);
866 }
867 return head;
868
869
870
871no_grow:
872 if (head) {
873 do {
874 bh = head;
875 head = head->b_this_page;
876 free_buffer_head(bh);
877 } while (head);
878 }
879
880
881
882
883
884
885
886 if (!retry)
887 return NULL;
888
889
890
891
892
893
894
895 free_more_memory();
896 goto try_again;
897}
898EXPORT_SYMBOL_GPL(alloc_page_buffers);
899
900static inline void
901link_dev_buffers(struct page *page, struct buffer_head *head)
902{
903 struct buffer_head *bh, *tail;
904
905 bh = head;
906 do {
907 tail = bh;
908 bh = bh->b_this_page;
909 } while (bh);
910 tail->b_this_page = head;
911 attach_page_buffers(page, head);
912}
913
914static sector_t blkdev_max_block(struct block_device *bdev, unsigned int size)
915{
916 sector_t retval = ~((sector_t)0);
917 loff_t sz = i_size_read(bdev->bd_inode);
918
919 if (sz) {
920 unsigned int sizebits = blksize_bits(size);
921 retval = (sz >> sizebits);
922 }
923 return retval;
924}
925
926
927
928
929static sector_t
930init_page_buffers(struct page *page, struct block_device *bdev,
931 sector_t block, int size)
932{
933 struct buffer_head *head = page_buffers(page);
934 struct buffer_head *bh = head;
935 int uptodate = PageUptodate(page);
936 sector_t end_block = blkdev_max_block(I_BDEV(bdev->bd_inode), size);
937
938 do {
939 if (!buffer_mapped(bh)) {
940 init_buffer(bh, NULL, NULL);
941 bh->b_bdev = bdev;
942 bh->b_blocknr = block;
943 if (uptodate)
944 set_buffer_uptodate(bh);
945 if (block < end_block)
946 set_buffer_mapped(bh);
947 }
948 block++;
949 bh = bh->b_this_page;
950 } while (bh != head);
951
952
953
954
955 return end_block;
956}
957
958
959
960
961
962
963static int
964grow_dev_page(struct block_device *bdev, sector_t block,
965 pgoff_t index, int size, int sizebits)
966{
967 struct inode *inode = bdev->bd_inode;
968 struct page *page;
969 struct buffer_head *bh;
970 sector_t end_block;
971 int ret = 0;
972
973 page = find_or_create_page(inode->i_mapping, index,
974 (mapping_gfp_mask(inode->i_mapping) & ~__GFP_FS)|__GFP_MOVABLE);
975 if (!page)
976 return ret;
977
978 BUG_ON(!PageLocked(page));
979
980 if (page_has_buffers(page)) {
981 bh = page_buffers(page);
982 if (bh->b_size == size) {
983 end_block = init_page_buffers(page, bdev,
984 index << sizebits, size);
985 goto done;
986 }
987 if (!try_to_free_buffers(page))
988 goto failed;
989 }
990
991
992
993
994 bh = alloc_page_buffers(page, size, 0);
995 if (!bh)
996 goto failed;
997
998
999
1000
1001
1002
1003 spin_lock(&inode->i_mapping->private_lock);
1004 link_dev_buffers(page, bh);
1005 end_block = init_page_buffers(page, bdev, index << sizebits, size);
1006 spin_unlock(&inode->i_mapping->private_lock);
1007done:
1008 ret = (block < end_block) ? 1 : -ENXIO;
1009failed:
1010 unlock_page(page);
1011 page_cache_release(page);
1012 return ret;
1013}
1014
1015
1016
1017
1018
1019static int
1020grow_buffers(struct block_device *bdev, sector_t block, int size)
1021{
1022 pgoff_t index;
1023 int sizebits;
1024
1025 sizebits = -1;
1026 do {
1027 sizebits++;
1028 } while ((size << sizebits) < PAGE_SIZE);
1029
1030 index = block >> sizebits;
1031
1032
1033
1034
1035
1036 if (unlikely(index != block >> sizebits)) {
1037 char b[BDEVNAME_SIZE];
1038
1039 printk(KERN_ERR "%s: requested out-of-range block %llu for "
1040 "device %s\n",
1041 __func__, (unsigned long long)block,
1042 bdevname(bdev, b));
1043 return -EIO;
1044 }
1045
1046
1047 return grow_dev_page(bdev, block, index, size, sizebits);
1048}
1049
1050static struct buffer_head *
1051__getblk_slow(struct block_device *bdev, sector_t block, int size)
1052{
1053
1054 if (unlikely(size & (bdev_logical_block_size(bdev)-1) ||
1055 (size < 512 || size > PAGE_SIZE))) {
1056 printk(KERN_ERR "getblk(): invalid block size %d requested\n",
1057 size);
1058 printk(KERN_ERR "logical block size: %d\n",
1059 bdev_logical_block_size(bdev));
1060
1061 dump_stack();
1062 return NULL;
1063 }
1064
1065 for (;;) {
1066 struct buffer_head *bh;
1067 int ret;
1068
1069 bh = __find_get_block(bdev, block, size);
1070 if (bh)
1071 return bh;
1072
1073 ret = grow_buffers(bdev, block, size);
1074 if (ret < 0)
1075 return NULL;
1076 if (ret == 0)
1077 free_more_memory();
1078 }
1079}
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116void mark_buffer_dirty(struct buffer_head *bh)
1117{
1118 WARN_ON_ONCE(!buffer_uptodate(bh));
1119
1120
1121
1122
1123
1124
1125
1126 if (buffer_dirty(bh)) {
1127 smp_mb();
1128 if (buffer_dirty(bh))
1129 return;
1130 }
1131
1132 if (!test_set_buffer_dirty(bh)) {
1133 struct page *page = bh->b_page;
1134 if (!TestSetPageDirty(page)) {
1135 struct address_space *mapping = page_mapping(page);
1136 if (mapping)
1137 __set_page_dirty(page, mapping, 0);
1138 }
1139 }
1140}
1141EXPORT_SYMBOL(mark_buffer_dirty);
1142
1143
1144
1145
1146
1147
1148
1149
1150void __brelse(struct buffer_head * buf)
1151{
1152 if (atomic_read(&buf->b_count)) {
1153 put_bh(buf);
1154 return;
1155 }
1156 WARN(1, KERN_ERR "VFS: brelse: Trying to free free buffer\n");
1157}
1158EXPORT_SYMBOL(__brelse);
1159
1160
1161
1162
1163
1164void __bforget(struct buffer_head *bh)
1165{
1166 clear_buffer_dirty(bh);
1167 if (bh->b_assoc_map) {
1168 struct address_space *buffer_mapping = bh->b_page->mapping;
1169
1170 spin_lock(&buffer_mapping->private_lock);
1171 list_del_init(&bh->b_assoc_buffers);
1172 bh->b_assoc_map = NULL;
1173 spin_unlock(&buffer_mapping->private_lock);
1174 }
1175 __brelse(bh);
1176}
1177EXPORT_SYMBOL(__bforget);
1178
1179static struct buffer_head *__bread_slow(struct buffer_head *bh)
1180{
1181 lock_buffer(bh);
1182 if (buffer_uptodate(bh)) {
1183 unlock_buffer(bh);
1184 return bh;
1185 } else {
1186 get_bh(bh);
1187 bh->b_end_io = end_buffer_read_sync;
1188 submit_bh(READ, bh);
1189 wait_on_buffer(bh);
1190 if (buffer_uptodate(bh))
1191 return bh;
1192 }
1193 brelse(bh);
1194 return NULL;
1195}
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211#define BH_LRU_SIZE 8
1212
1213struct bh_lru {
1214 struct buffer_head *bhs[BH_LRU_SIZE];
1215};
1216
1217static DEFINE_PER_CPU(struct bh_lru, bh_lrus) = {{ NULL }};
1218
1219#ifdef CONFIG_SMP
1220#define bh_lru_lock() local_irq_disable()
1221#define bh_lru_unlock() local_irq_enable()
1222#else
1223#define bh_lru_lock() preempt_disable()
1224#define bh_lru_unlock() preempt_enable()
1225#endif
1226
1227static inline void check_irqs_on(void)
1228{
1229#ifdef irqs_disabled
1230 BUG_ON(irqs_disabled());
1231#endif
1232}
1233
1234
1235
1236
1237static void bh_lru_install(struct buffer_head *bh)
1238{
1239 struct buffer_head *evictee = NULL;
1240
1241 check_irqs_on();
1242 bh_lru_lock();
1243 if (__this_cpu_read(bh_lrus.bhs[0]) != bh) {
1244 struct buffer_head *bhs[BH_LRU_SIZE];
1245 int in;
1246 int out = 0;
1247
1248 get_bh(bh);
1249 bhs[out++] = bh;
1250 for (in = 0; in < BH_LRU_SIZE; in++) {
1251 struct buffer_head *bh2 =
1252 __this_cpu_read(bh_lrus.bhs[in]);
1253
1254 if (bh2 == bh) {
1255 __brelse(bh2);
1256 } else {
1257 if (out >= BH_LRU_SIZE) {
1258 BUG_ON(evictee != NULL);
1259 evictee = bh2;
1260 } else {
1261 bhs[out++] = bh2;
1262 }
1263 }
1264 }
1265 while (out < BH_LRU_SIZE)
1266 bhs[out++] = NULL;
1267 memcpy(__this_cpu_ptr(&bh_lrus.bhs), bhs, sizeof(bhs));
1268 }
1269 bh_lru_unlock();
1270
1271 if (evictee)
1272 __brelse(evictee);
1273}
1274
1275
1276
1277
1278static struct buffer_head *
1279lookup_bh_lru(struct block_device *bdev, sector_t block, unsigned size)
1280{
1281 struct buffer_head *ret = NULL;
1282 unsigned int i;
1283
1284 check_irqs_on();
1285 bh_lru_lock();
1286 for (i = 0; i < BH_LRU_SIZE; i++) {
1287 struct buffer_head *bh = __this_cpu_read(bh_lrus.bhs[i]);
1288
1289 if (bh && bh->b_bdev == bdev &&
1290 bh->b_blocknr == block && bh->b_size == size) {
1291 if (i) {
1292 while (i) {
1293 __this_cpu_write(bh_lrus.bhs[i],
1294 __this_cpu_read(bh_lrus.bhs[i - 1]));
1295 i--;
1296 }
1297 __this_cpu_write(bh_lrus.bhs[0], bh);
1298 }
1299 get_bh(bh);
1300 ret = bh;
1301 break;
1302 }
1303 }
1304 bh_lru_unlock();
1305 return ret;
1306}
1307
1308
1309
1310
1311
1312
1313struct buffer_head *
1314__find_get_block(struct block_device *bdev, sector_t block, unsigned size)
1315{
1316 struct buffer_head *bh = lookup_bh_lru(bdev, block, size);
1317
1318 if (bh == NULL) {
1319 bh = __find_get_block_slow(bdev, block);
1320 if (bh)
1321 bh_lru_install(bh);
1322 }
1323 if (bh)
1324 touch_buffer(bh);
1325 return bh;
1326}
1327EXPORT_SYMBOL(__find_get_block);
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337struct buffer_head *
1338__getblk(struct block_device *bdev, sector_t block, unsigned size)
1339{
1340 struct buffer_head *bh = __find_get_block(bdev, block, size);
1341
1342 might_sleep();
1343 if (bh == NULL)
1344 bh = __getblk_slow(bdev, block, size);
1345 return bh;
1346}
1347EXPORT_SYMBOL(__getblk);
1348
1349
1350
1351
1352void __breadahead(struct block_device *bdev, sector_t block, unsigned size)
1353{
1354 struct buffer_head *bh = __getblk(bdev, block, size);
1355 if (likely(bh)) {
1356 ll_rw_block(READA, 1, &bh);
1357 brelse(bh);
1358 }
1359}
1360EXPORT_SYMBOL(__breadahead);
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371struct buffer_head *
1372__bread(struct block_device *bdev, sector_t block, unsigned size)
1373{
1374 struct buffer_head *bh = __getblk(bdev, block, size);
1375
1376 if (likely(bh) && !buffer_uptodate(bh))
1377 bh = __bread_slow(bh);
1378 return bh;
1379}
1380EXPORT_SYMBOL(__bread);
1381
1382
1383
1384
1385
1386
1387static void invalidate_bh_lru(void *arg)
1388{
1389 struct bh_lru *b = &get_cpu_var(bh_lrus);
1390 int i;
1391
1392 for (i = 0; i < BH_LRU_SIZE; i++) {
1393 brelse(b->bhs[i]);
1394 b->bhs[i] = NULL;
1395 }
1396 put_cpu_var(bh_lrus);
1397}
1398
1399static bool has_bh_in_lru(int cpu, void *dummy)
1400{
1401 struct bh_lru *b = per_cpu_ptr(&bh_lrus, cpu);
1402 int i;
1403
1404 for (i = 0; i < BH_LRU_SIZE; i++) {
1405 if (b->bhs[i])
1406 return 1;
1407 }
1408
1409 return 0;
1410}
1411
1412void invalidate_bh_lrus(void)
1413{
1414 on_each_cpu_cond(has_bh_in_lru, invalidate_bh_lru, NULL, 1, GFP_KERNEL);
1415}
1416EXPORT_SYMBOL_GPL(invalidate_bh_lrus);
1417
1418void set_bh_page(struct buffer_head *bh,
1419 struct page *page, unsigned long offset)
1420{
1421 bh->b_page = page;
1422 BUG_ON(offset >= PAGE_SIZE);
1423 if (PageHighMem(page))
1424
1425
1426
1427 bh->b_data = (char *)(0 + offset);
1428 else
1429 bh->b_data = page_address(page) + offset;
1430}
1431EXPORT_SYMBOL(set_bh_page);
1432
1433
1434
1435
1436static void discard_buffer(struct buffer_head * bh)
1437{
1438 lock_buffer(bh);
1439 clear_buffer_dirty(bh);
1440 bh->b_bdev = NULL;
1441 clear_buffer_mapped(bh);
1442 clear_buffer_req(bh);
1443 clear_buffer_new(bh);
1444 clear_buffer_delay(bh);
1445 clear_buffer_unwritten(bh);
1446 unlock_buffer(bh);
1447}
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464void block_invalidatepage(struct page *page, unsigned long offset)
1465{
1466 struct buffer_head *head, *bh, *next;
1467 unsigned int curr_off = 0;
1468
1469 BUG_ON(!PageLocked(page));
1470 if (!page_has_buffers(page))
1471 goto out;
1472
1473 head = page_buffers(page);
1474 bh = head;
1475 do {
1476 unsigned int next_off = curr_off + bh->b_size;
1477 next = bh->b_this_page;
1478
1479
1480
1481
1482 if (offset <= curr_off)
1483 discard_buffer(bh);
1484 curr_off = next_off;
1485 bh = next;
1486 } while (bh != head);
1487
1488
1489
1490
1491
1492
1493 if (offset == 0)
1494 try_to_release_page(page, 0);
1495out:
1496 return;
1497}
1498EXPORT_SYMBOL(block_invalidatepage);
1499
1500
1501
1502
1503
1504
1505void create_empty_buffers(struct page *page,
1506 unsigned long blocksize, unsigned long b_state)
1507{
1508 struct buffer_head *bh, *head, *tail;
1509
1510 head = alloc_page_buffers(page, blocksize, 1);
1511 bh = head;
1512 do {
1513 bh->b_state |= b_state;
1514 tail = bh;
1515 bh = bh->b_this_page;
1516 } while (bh);
1517 tail->b_this_page = head;
1518
1519 spin_lock(&page->mapping->private_lock);
1520 if (PageUptodate(page) || PageDirty(page)) {
1521 bh = head;
1522 do {
1523 if (PageDirty(page))
1524 set_buffer_dirty(bh);
1525 if (PageUptodate(page))
1526 set_buffer_uptodate(bh);
1527 bh = bh->b_this_page;
1528 } while (bh != head);
1529 }
1530 attach_page_buffers(page, head);
1531 spin_unlock(&page->mapping->private_lock);
1532}
1533EXPORT_SYMBOL(create_empty_buffers);
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551void unmap_underlying_metadata(struct block_device *bdev, sector_t block)
1552{
1553 struct buffer_head *old_bh;
1554
1555 might_sleep();
1556
1557 old_bh = __find_get_block_slow(bdev, block);
1558 if (old_bh) {
1559 clear_buffer_dirty(old_bh);
1560 wait_on_buffer(old_bh);
1561 clear_buffer_req(old_bh);
1562 __brelse(old_bh);
1563 }
1564}
1565EXPORT_SYMBOL(unmap_underlying_metadata);
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575static inline int block_size_bits(unsigned int blocksize)
1576{
1577 return ilog2(blocksize);
1578}
1579
1580static struct buffer_head *create_page_buffers(struct page *page, struct inode *inode, unsigned int b_state)
1581{
1582 BUG_ON(!PageLocked(page));
1583
1584 if (!page_has_buffers(page))
1585 create_empty_buffers(page, 1 << ACCESS_ONCE(inode->i_blkbits), b_state);
1586 return page_buffers(page);
1587}
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618static int __block_write_full_page(struct inode *inode, struct page *page,
1619 get_block_t *get_block, struct writeback_control *wbc,
1620 bh_end_io_t *handler)
1621{
1622 int err;
1623 sector_t block;
1624 sector_t last_block;
1625 struct buffer_head *bh, *head;
1626 unsigned int blocksize, bbits;
1627 int nr_underway = 0;
1628 int write_op = (wbc->sync_mode == WB_SYNC_ALL ?
1629 WRITE_SYNC : WRITE);
1630
1631 head = create_page_buffers(page, inode,
1632 (1 << BH_Dirty)|(1 << BH_Uptodate));
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644 bh = head;
1645 blocksize = bh->b_size;
1646 bbits = block_size_bits(blocksize);
1647
1648 block = (sector_t)page->index << (PAGE_CACHE_SHIFT - bbits);
1649 last_block = (i_size_read(inode) - 1) >> bbits;
1650
1651
1652
1653
1654
1655 do {
1656 if (block > last_block) {
1657
1658
1659
1660
1661
1662
1663
1664
1665 clear_buffer_dirty(bh);
1666 set_buffer_uptodate(bh);
1667 } else if ((!buffer_mapped(bh) || buffer_delay(bh)) &&
1668 buffer_dirty(bh)) {
1669 WARN_ON(bh->b_size != blocksize);
1670 err = get_block(inode, block, bh, 1);
1671 if (err)
1672 goto recover;
1673 clear_buffer_delay(bh);
1674 if (buffer_new(bh)) {
1675
1676 clear_buffer_new(bh);
1677 unmap_underlying_metadata(bh->b_bdev,
1678 bh->b_blocknr);
1679 }
1680 }
1681 bh = bh->b_this_page;
1682 block++;
1683 } while (bh != head);
1684
1685 do {
1686 if (!buffer_mapped(bh))
1687 continue;
1688
1689
1690
1691
1692
1693
1694
1695 if (wbc->sync_mode != WB_SYNC_NONE) {
1696 lock_buffer(bh);
1697 } else if (!trylock_buffer(bh)) {
1698 redirty_page_for_writepage(wbc, page);
1699 continue;
1700 }
1701 if (test_clear_buffer_dirty(bh)) {
1702 mark_buffer_async_write_endio(bh, handler);
1703 } else {
1704 unlock_buffer(bh);
1705 }
1706 } while ((bh = bh->b_this_page) != head);
1707
1708
1709
1710
1711
1712 BUG_ON(PageWriteback(page));
1713 set_page_writeback(page);
1714
1715 do {
1716 struct buffer_head *next = bh->b_this_page;
1717 if (buffer_async_write(bh)) {
1718 submit_bh(write_op, bh);
1719 nr_underway++;
1720 }
1721 bh = next;
1722 } while (bh != head);
1723 unlock_page(page);
1724
1725 err = 0;
1726done:
1727 if (nr_underway == 0) {
1728
1729
1730
1731
1732
1733 end_page_writeback(page);
1734
1735
1736
1737
1738
1739 }
1740 return err;
1741
1742recover:
1743
1744
1745
1746
1747
1748
1749 bh = head;
1750
1751 do {
1752 if (buffer_mapped(bh) && buffer_dirty(bh) &&
1753 !buffer_delay(bh)) {
1754 lock_buffer(bh);
1755 mark_buffer_async_write_endio(bh, handler);
1756 } else {
1757
1758
1759
1760
1761 clear_buffer_dirty(bh);
1762 }
1763 } while ((bh = bh->b_this_page) != head);
1764 SetPageError(page);
1765 BUG_ON(PageWriteback(page));
1766 mapping_set_error(page->mapping, err);
1767 set_page_writeback(page);
1768 do {
1769 struct buffer_head *next = bh->b_this_page;
1770 if (buffer_async_write(bh)) {
1771 clear_buffer_dirty(bh);
1772 submit_bh(write_op, bh);
1773 nr_underway++;
1774 }
1775 bh = next;
1776 } while (bh != head);
1777 unlock_page(page);
1778 goto done;
1779}
1780
1781
1782
1783
1784
1785
1786void page_zero_new_buffers(struct page *page, unsigned from, unsigned to)
1787{
1788 unsigned int block_start, block_end;
1789 struct buffer_head *head, *bh;
1790
1791 BUG_ON(!PageLocked(page));
1792 if (!page_has_buffers(page))
1793 return;
1794
1795 bh = head = page_buffers(page);
1796 block_start = 0;
1797 do {
1798 block_end = block_start + bh->b_size;
1799
1800 if (buffer_new(bh)) {
1801 if (block_end > from && block_start < to) {
1802 if (!PageUptodate(page)) {
1803 unsigned start, size;
1804
1805 start = max(from, block_start);
1806 size = min(to, block_end) - start;
1807
1808 zero_user(page, start, size);
1809 set_buffer_uptodate(bh);
1810 }
1811
1812 clear_buffer_new(bh);
1813 mark_buffer_dirty(bh);
1814 }
1815 }
1816
1817 block_start = block_end;
1818 bh = bh->b_this_page;
1819 } while (bh != head);
1820}
1821EXPORT_SYMBOL(page_zero_new_buffers);
1822
1823int __block_write_begin(struct page *page, loff_t pos, unsigned len,
1824 get_block_t *get_block)
1825{
1826 unsigned from = pos & (PAGE_CACHE_SIZE - 1);
1827 unsigned to = from + len;
1828 struct inode *inode = page->mapping->host;
1829 unsigned block_start, block_end;
1830 sector_t block;
1831 int err = 0;
1832 unsigned blocksize, bbits;
1833 struct buffer_head *bh, *head, *wait[2], **wait_bh=wait;
1834
1835 BUG_ON(!PageLocked(page));
1836 BUG_ON(from > PAGE_CACHE_SIZE);
1837 BUG_ON(to > PAGE_CACHE_SIZE);
1838 BUG_ON(from > to);
1839
1840 head = create_page_buffers(page, inode, 0);
1841 blocksize = head->b_size;
1842 bbits = block_size_bits(blocksize);
1843
1844 block = (sector_t)page->index << (PAGE_CACHE_SHIFT - bbits);
1845
1846 for(bh = head, block_start = 0; bh != head || !block_start;
1847 block++, block_start=block_end, bh = bh->b_this_page) {
1848 block_end = block_start + blocksize;
1849 if (block_end <= from || block_start >= to) {
1850 if (PageUptodate(page)) {
1851 if (!buffer_uptodate(bh))
1852 set_buffer_uptodate(bh);
1853 }
1854 continue;
1855 }
1856 if (buffer_new(bh))
1857 clear_buffer_new(bh);
1858 if (!buffer_mapped(bh)) {
1859 WARN_ON(bh->b_size != blocksize);
1860 err = get_block(inode, block, bh, 1);
1861 if (err)
1862 break;
1863 if (buffer_new(bh)) {
1864 unmap_underlying_metadata(bh->b_bdev,
1865 bh->b_blocknr);
1866 if (PageUptodate(page)) {
1867 clear_buffer_new(bh);
1868 set_buffer_uptodate(bh);
1869 mark_buffer_dirty(bh);
1870 continue;
1871 }
1872 if (block_end > to || block_start < from)
1873 zero_user_segments(page,
1874 to, block_end,
1875 block_start, from);
1876 continue;
1877 }
1878 }
1879 if (PageUptodate(page)) {
1880 if (!buffer_uptodate(bh))
1881 set_buffer_uptodate(bh);
1882 continue;
1883 }
1884 if (!buffer_uptodate(bh) && !buffer_delay(bh) &&
1885 !buffer_unwritten(bh) &&
1886 (block_start < from || block_end > to)) {
1887 ll_rw_block(READ, 1, &bh);
1888 *wait_bh++=bh;
1889 }
1890 }
1891
1892
1893
1894 while(wait_bh > wait) {
1895 wait_on_buffer(*--wait_bh);
1896 if (!buffer_uptodate(*wait_bh))
1897 err = -EIO;
1898 }
1899 if (unlikely(err))
1900 page_zero_new_buffers(page, from, to);
1901 return err;
1902}
1903EXPORT_SYMBOL(__block_write_begin);
1904
1905static int __block_commit_write(struct inode *inode, struct page *page,
1906 unsigned from, unsigned to)
1907{
1908 unsigned block_start, block_end;
1909 int partial = 0;
1910 unsigned blocksize;
1911 struct buffer_head *bh, *head;
1912
1913 bh = head = page_buffers(page);
1914 blocksize = bh->b_size;
1915
1916 block_start = 0;
1917 do {
1918 block_end = block_start + blocksize;
1919 if (block_end <= from || block_start >= to) {
1920 if (!buffer_uptodate(bh))
1921 partial = 1;
1922 } else {
1923 set_buffer_uptodate(bh);
1924 mark_buffer_dirty(bh);
1925 }
1926 clear_buffer_new(bh);
1927
1928 block_start = block_end;
1929 bh = bh->b_this_page;
1930 } while (bh != head);
1931
1932
1933
1934
1935
1936
1937
1938 if (!partial)
1939 SetPageUptodate(page);
1940 return 0;
1941}
1942
1943
1944
1945
1946
1947
1948
1949int block_write_begin(struct address_space *mapping, loff_t pos, unsigned len,
1950 unsigned flags, struct page **pagep, get_block_t *get_block)
1951{
1952 pgoff_t index = pos >> PAGE_CACHE_SHIFT;
1953 struct page *page;
1954 int status;
1955
1956 page = grab_cache_page_write_begin(mapping, index, flags);
1957 if (!page)
1958 return -ENOMEM;
1959
1960 status = __block_write_begin(page, pos, len, get_block);
1961 if (unlikely(status)) {
1962 unlock_page(page);
1963 page_cache_release(page);
1964 page = NULL;
1965 }
1966
1967 *pagep = page;
1968 return status;
1969}
1970EXPORT_SYMBOL(block_write_begin);
1971
1972int block_write_end(struct file *file, struct address_space *mapping,
1973 loff_t pos, unsigned len, unsigned copied,
1974 struct page *page, void *fsdata)
1975{
1976 struct inode *inode = mapping->host;
1977 unsigned start;
1978
1979 start = pos & (PAGE_CACHE_SIZE - 1);
1980
1981 if (unlikely(copied < len)) {
1982
1983
1984
1985
1986
1987
1988
1989
1990
1991
1992
1993
1994 if (!PageUptodate(page))
1995 copied = 0;
1996
1997 page_zero_new_buffers(page, start+copied, start+len);
1998 }
1999 flush_dcache_page(page);
2000
2001
2002 __block_commit_write(inode, page, start, start+copied);
2003
2004 return copied;
2005}
2006EXPORT_SYMBOL(block_write_end);
2007
2008int generic_write_end(struct file *file, struct address_space *mapping,
2009 loff_t pos, unsigned len, unsigned copied,
2010 struct page *page, void *fsdata)
2011{
2012 struct inode *inode = mapping->host;
2013 int i_size_changed = 0;
2014
2015 copied = block_write_end(file, mapping, pos, len, copied, page, fsdata);
2016
2017
2018
2019
2020
2021
2022
2023
2024 if (pos+copied > inode->i_size) {
2025 i_size_write(inode, pos+copied);
2026 i_size_changed = 1;
2027 }
2028
2029 unlock_page(page);
2030 page_cache_release(page);
2031
2032
2033
2034
2035
2036
2037
2038 if (i_size_changed)
2039 mark_inode_dirty(inode);
2040
2041 return copied;
2042}
2043EXPORT_SYMBOL(generic_write_end);
2044
2045
2046
2047
2048
2049
2050
2051
2052int block_is_partially_uptodate(struct page *page, read_descriptor_t *desc,
2053 unsigned long from)
2054{
2055 unsigned block_start, block_end, blocksize;
2056 unsigned to;
2057 struct buffer_head *bh, *head;
2058 int ret = 1;
2059
2060 if (!page_has_buffers(page))
2061 return 0;
2062
2063 head = page_buffers(page);
2064 blocksize = head->b_size;
2065 to = min_t(unsigned, PAGE_CACHE_SIZE - from, desc->count);
2066 to = from + to;
2067 if (from < blocksize && to > PAGE_CACHE_SIZE - blocksize)
2068 return 0;
2069
2070 bh = head;
2071 block_start = 0;
2072 do {
2073 block_end = block_start + blocksize;
2074 if (block_end > from && block_start < to) {
2075 if (!buffer_uptodate(bh)) {
2076 ret = 0;
2077 break;
2078 }
2079 if (block_end >= to)
2080 break;
2081 }
2082 block_start = block_end;
2083 bh = bh->b_this_page;
2084 } while (bh != head);
2085
2086 return ret;
2087}
2088EXPORT_SYMBOL(block_is_partially_uptodate);
2089
2090
2091
2092
2093
2094
2095
2096
2097int block_read_full_page(struct page *page, get_block_t *get_block)
2098{
2099 struct inode *inode = page->mapping->host;
2100 sector_t iblock, lblock;
2101 struct buffer_head *bh, *head, *arr[MAX_BUF_PER_PAGE];
2102 unsigned int blocksize, bbits;
2103 int nr, i;
2104 int fully_mapped = 1;
2105
2106 head = create_page_buffers(page, inode, 0);
2107 blocksize = head->b_size;
2108 bbits = block_size_bits(blocksize);
2109
2110 iblock = (sector_t)page->index << (PAGE_CACHE_SHIFT - bbits);
2111 lblock = (i_size_read(inode)+blocksize-1) >> bbits;
2112 bh = head;
2113 nr = 0;
2114 i = 0;
2115
2116 do {
2117 if (buffer_uptodate(bh))
2118 continue;
2119
2120 if (!buffer_mapped(bh)) {
2121 int err = 0;
2122
2123 fully_mapped = 0;
2124 if (iblock < lblock) {
2125 WARN_ON(bh->b_size != blocksize);
2126 err = get_block(inode, iblock, bh, 0);
2127 if (err)
2128 SetPageError(page);
2129 }
2130 if (!buffer_mapped(bh)) {
2131 zero_user(page, i * blocksize, blocksize);
2132 if (!err)
2133 set_buffer_uptodate(bh);
2134 continue;
2135 }
2136
2137
2138
2139
2140 if (buffer_uptodate(bh))
2141 continue;
2142 }
2143 arr[nr++] = bh;
2144 } while (i++, iblock++, (bh = bh->b_this_page) != head);
2145
2146 if (fully_mapped)
2147 SetPageMappedToDisk(page);
2148
2149 if (!nr) {
2150
2151
2152
2153
2154 if (!PageError(page))
2155 SetPageUptodate(page);
2156 unlock_page(page);
2157 return 0;
2158 }
2159
2160
2161 for (i = 0; i < nr; i++) {
2162 bh = arr[i];
2163 lock_buffer(bh);
2164 mark_buffer_async_read(bh);
2165 }
2166
2167
2168
2169
2170
2171
2172 for (i = 0; i < nr; i++) {
2173 bh = arr[i];
2174 if (buffer_uptodate(bh))
2175 end_buffer_async_read(bh, 1);
2176 else
2177 submit_bh(READ, bh);
2178 }
2179 return 0;
2180}
2181EXPORT_SYMBOL(block_read_full_page);
2182
2183
2184
2185
2186
2187int generic_cont_expand_simple(struct inode *inode, loff_t size)
2188{
2189 struct address_space *mapping = inode->i_mapping;
2190 struct page *page;
2191 void *fsdata;
2192 int err;
2193
2194 err = inode_newsize_ok(inode, size);
2195 if (err)
2196 goto out;
2197
2198 err = pagecache_write_begin(NULL, mapping, size, 0,
2199 AOP_FLAG_UNINTERRUPTIBLE|AOP_FLAG_CONT_EXPAND,
2200 &page, &fsdata);
2201 if (err)
2202 goto out;
2203
2204 err = pagecache_write_end(NULL, mapping, size, 0, 0, page, fsdata);
2205 BUG_ON(err > 0);
2206
2207out:
2208 return err;
2209}
2210EXPORT_SYMBOL(generic_cont_expand_simple);
2211
2212static int cont_expand_zero(struct file *file, struct address_space *mapping,
2213 loff_t pos, loff_t *bytes)
2214{
2215 struct inode *inode = mapping->host;
2216 unsigned blocksize = 1 << inode->i_blkbits;
2217 struct page *page;
2218 void *fsdata;
2219 pgoff_t index, curidx;
2220 loff_t curpos;
2221 unsigned zerofrom, offset, len;
2222 int err = 0;
2223
2224 index = pos >> PAGE_CACHE_SHIFT;
2225 offset = pos & ~PAGE_CACHE_MASK;
2226
2227 while (index > (curidx = (curpos = *bytes)>>PAGE_CACHE_SHIFT)) {
2228 zerofrom = curpos & ~PAGE_CACHE_MASK;
2229 if (zerofrom & (blocksize-1)) {
2230 *bytes |= (blocksize-1);
2231 (*bytes)++;
2232 }
2233 len = PAGE_CACHE_SIZE - zerofrom;
2234
2235 err = pagecache_write_begin(file, mapping, curpos, len,
2236 AOP_FLAG_UNINTERRUPTIBLE,
2237 &page, &fsdata);
2238 if (err)
2239 goto out;
2240 zero_user(page, zerofrom, len);
2241 err = pagecache_write_end(file, mapping, curpos, len, len,
2242 page, fsdata);
2243 if (err < 0)
2244 goto out;
2245 BUG_ON(err != len);
2246 err = 0;
2247
2248 balance_dirty_pages_ratelimited(mapping);
2249 }
2250
2251
2252 if (index == curidx) {
2253 zerofrom = curpos & ~PAGE_CACHE_MASK;
2254
2255 if (offset <= zerofrom) {
2256 goto out;
2257 }
2258 if (zerofrom & (blocksize-1)) {
2259 *bytes |= (blocksize-1);
2260 (*bytes)++;
2261 }
2262 len = offset - zerofrom;
2263
2264 err = pagecache_write_begin(file, mapping, curpos, len,
2265 AOP_FLAG_UNINTERRUPTIBLE,
2266 &page, &fsdata);
2267 if (err)
2268 goto out;
2269 zero_user(page, zerofrom, len);
2270 err = pagecache_write_end(file, mapping, curpos, len, len,
2271 page, fsdata);
2272 if (err < 0)
2273 goto out;
2274 BUG_ON(err != len);
2275 err = 0;
2276 }
2277out:
2278 return err;
2279}
2280
2281
2282
2283
2284
2285int cont_write_begin(struct file *file, struct address_space *mapping,
2286 loff_t pos, unsigned len, unsigned flags,
2287 struct page **pagep, void **fsdata,
2288 get_block_t *get_block, loff_t *bytes)
2289{
2290 struct inode *inode = mapping->host;
2291 unsigned blocksize = 1 << inode->i_blkbits;
2292 unsigned zerofrom;
2293 int err;
2294
2295 err = cont_expand_zero(file, mapping, pos, bytes);
2296 if (err)
2297 return err;
2298
2299 zerofrom = *bytes & ~PAGE_CACHE_MASK;
2300 if (pos+len > *bytes && zerofrom & (blocksize-1)) {
2301 *bytes |= (blocksize-1);
2302 (*bytes)++;
2303 }
2304
2305 return block_write_begin(mapping, pos, len, flags, pagep, get_block);
2306}
2307EXPORT_SYMBOL(cont_write_begin);
2308
2309int block_commit_write(struct page *page, unsigned from, unsigned to)
2310{
2311 struct inode *inode = page->mapping->host;
2312 __block_commit_write(inode,page,from,to);
2313 return 0;
2314}
2315EXPORT_SYMBOL(block_commit_write);
2316
2317
2318
2319
2320
2321
2322
2323
2324
2325
2326
2327
2328
2329
2330
2331
2332
2333
2334
2335int __block_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf,
2336 get_block_t get_block)
2337{
2338 struct page *page = vmf->page;
2339 struct inode *inode = vma->vm_file->f_path.dentry->d_inode;
2340 unsigned long end;
2341 loff_t size;
2342 int ret;
2343
2344 lock_page(page);
2345 size = i_size_read(inode);
2346 if ((page->mapping != inode->i_mapping) ||
2347 (page_offset(page) > size)) {
2348
2349 ret = -EFAULT;
2350 goto out_unlock;
2351 }
2352
2353
2354 if (((page->index + 1) << PAGE_CACHE_SHIFT) > size)
2355 end = size & ~PAGE_CACHE_MASK;
2356 else
2357 end = PAGE_CACHE_SIZE;
2358
2359 ret = __block_write_begin(page, 0, end, get_block);
2360 if (!ret)
2361 ret = block_commit_write(page, 0, end);
2362
2363 if (unlikely(ret < 0))
2364 goto out_unlock;
2365 set_page_dirty(page);
2366 wait_on_page_writeback(page);
2367 return 0;
2368out_unlock:
2369 unlock_page(page);
2370 return ret;
2371}
2372EXPORT_SYMBOL(__block_page_mkwrite);
2373
2374int block_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf,
2375 get_block_t get_block)
2376{
2377 int ret;
2378 struct super_block *sb = vma->vm_file->f_path.dentry->d_inode->i_sb;
2379
2380 sb_start_pagefault(sb);
2381
2382
2383
2384
2385
2386 file_update_time(vma->vm_file);
2387
2388 ret = __block_page_mkwrite(vma, vmf, get_block);
2389 sb_end_pagefault(sb);
2390 return block_page_mkwrite_return(ret);
2391}
2392EXPORT_SYMBOL(block_page_mkwrite);
2393
2394
2395
2396
2397
2398
2399static void end_buffer_read_nobh(struct buffer_head *bh, int uptodate)
2400{
2401 __end_buffer_read_notouch(bh, uptodate);
2402}
2403
2404
2405
2406
2407
2408
2409static void attach_nobh_buffers(struct page *page, struct buffer_head *head)
2410{
2411 struct buffer_head *bh;
2412
2413 BUG_ON(!PageLocked(page));
2414
2415 spin_lock(&page->mapping->private_lock);
2416 bh = head;
2417 do {
2418 if (PageDirty(page))
2419 set_buffer_dirty(bh);
2420 if (!bh->b_this_page)
2421 bh->b_this_page = head;
2422 bh = bh->b_this_page;
2423 } while (bh != head);
2424 attach_page_buffers(page, head);
2425 spin_unlock(&page->mapping->private_lock);
2426}
2427
2428
2429
2430
2431
2432
2433int nobh_write_begin(struct address_space *mapping,
2434 loff_t pos, unsigned len, unsigned flags,
2435 struct page **pagep, void **fsdata,
2436 get_block_t *get_block)
2437{
2438 struct inode *inode = mapping->host;
2439 const unsigned blkbits = inode->i_blkbits;
2440 const unsigned blocksize = 1 << blkbits;
2441 struct buffer_head *head, *bh;
2442 struct page *page;
2443 pgoff_t index;
2444 unsigned from, to;
2445 unsigned block_in_page;
2446 unsigned block_start, block_end;
2447 sector_t block_in_file;
2448 int nr_reads = 0;
2449 int ret = 0;
2450 int is_mapped_to_disk = 1;
2451
2452 index = pos >> PAGE_CACHE_SHIFT;
2453 from = pos & (PAGE_CACHE_SIZE - 1);
2454 to = from + len;
2455
2456 page = grab_cache_page_write_begin(mapping, index, flags);
2457 if (!page)
2458 return -ENOMEM;
2459 *pagep = page;
2460 *fsdata = NULL;
2461
2462 if (page_has_buffers(page)) {
2463 ret = __block_write_begin(page, pos, len, get_block);
2464 if (unlikely(ret))
2465 goto out_release;
2466 return ret;
2467 }
2468
2469 if (PageMappedToDisk(page))
2470 return 0;
2471
2472
2473
2474
2475
2476
2477
2478
2479
2480
2481 head = alloc_page_buffers(page, blocksize, 0);
2482 if (!head) {
2483 ret = -ENOMEM;
2484 goto out_release;
2485 }
2486
2487 block_in_file = (sector_t)page->index << (PAGE_CACHE_SHIFT - blkbits);
2488
2489
2490
2491
2492
2493
2494 for (block_start = 0, block_in_page = 0, bh = head;
2495 block_start < PAGE_CACHE_SIZE;
2496 block_in_page++, block_start += blocksize, bh = bh->b_this_page) {
2497 int create;
2498
2499 block_end = block_start + blocksize;
2500 bh->b_state = 0;
2501 create = 1;
2502 if (block_start >= to)
2503 create = 0;
2504 ret = get_block(inode, block_in_file + block_in_page,
2505 bh, create);
2506 if (ret)
2507 goto failed;
2508 if (!buffer_mapped(bh))
2509 is_mapped_to_disk = 0;
2510 if (buffer_new(bh))
2511 unmap_underlying_metadata(bh->b_bdev, bh->b_blocknr);
2512 if (PageUptodate(page)) {
2513 set_buffer_uptodate(bh);
2514 continue;
2515 }
2516 if (buffer_new(bh) || !buffer_mapped(bh)) {
2517 zero_user_segments(page, block_start, from,
2518 to, block_end);
2519 continue;
2520 }
2521 if (buffer_uptodate(bh))
2522 continue;
2523 if (block_start < from || block_end > to) {
2524 lock_buffer(bh);
2525 bh->b_end_io = end_buffer_read_nobh;
2526 submit_bh(READ, bh);
2527 nr_reads++;
2528 }
2529 }
2530
2531 if (nr_reads) {
2532
2533
2534
2535
2536
2537 for (bh = head; bh; bh = bh->b_this_page) {
2538 wait_on_buffer(bh);
2539 if (!buffer_uptodate(bh))
2540 ret = -EIO;
2541 }
2542 if (ret)
2543 goto failed;
2544 }
2545
2546 if (is_mapped_to_disk)
2547 SetPageMappedToDisk(page);
2548
2549 *fsdata = head;
2550
2551 return 0;
2552
2553failed:
2554 BUG_ON(!ret);
2555
2556
2557
2558
2559
2560
2561
2562 attach_nobh_buffers(page, head);
2563 page_zero_new_buffers(page, from, to);
2564
2565out_release:
2566 unlock_page(page);
2567 page_cache_release(page);
2568 *pagep = NULL;
2569
2570 return ret;
2571}
2572EXPORT_SYMBOL(nobh_write_begin);
2573
2574int nobh_write_end(struct file *file, struct address_space *mapping,
2575 loff_t pos, unsigned len, unsigned copied,
2576 struct page *page, void *fsdata)
2577{
2578 struct inode *inode = page->mapping->host;
2579 struct buffer_head *head = fsdata;
2580 struct buffer_head *bh;
2581 BUG_ON(fsdata != NULL && page_has_buffers(page));
2582
2583 if (unlikely(copied < len) && head)
2584 attach_nobh_buffers(page, head);
2585 if (page_has_buffers(page))
2586 return generic_write_end(file, mapping, pos, len,
2587 copied, page, fsdata);
2588
2589 SetPageUptodate(page);
2590 set_page_dirty(page);
2591 if (pos+copied > inode->i_size) {
2592 i_size_write(inode, pos+copied);
2593 mark_inode_dirty(inode);
2594 }
2595
2596 unlock_page(page);
2597 page_cache_release(page);
2598
2599 while (head) {
2600 bh = head;
2601 head = head->b_this_page;
2602 free_buffer_head(bh);
2603 }
2604
2605 return copied;
2606}
2607EXPORT_SYMBOL(nobh_write_end);
2608
2609
2610
2611
2612
2613
2614int nobh_writepage(struct page *page, get_block_t *get_block,
2615 struct writeback_control *wbc)
2616{
2617 struct inode * const inode = page->mapping->host;
2618 loff_t i_size = i_size_read(inode);
2619 const pgoff_t end_index = i_size >> PAGE_CACHE_SHIFT;
2620 unsigned offset;
2621 int ret;
2622
2623
2624 if (page->index < end_index)
2625 goto out;
2626
2627
2628 offset = i_size & (PAGE_CACHE_SIZE-1);
2629 if (page->index >= end_index+1 || !offset) {
2630
2631
2632
2633
2634
2635#if 0
2636
2637 if (page->mapping->a_ops->invalidatepage)
2638 page->mapping->a_ops->invalidatepage(page, offset);
2639#endif
2640 unlock_page(page);
2641 return 0;
2642 }
2643
2644
2645
2646
2647
2648
2649
2650
2651 zero_user_segment(page, offset, PAGE_CACHE_SIZE);
2652out:
2653 ret = mpage_writepage(page, get_block, wbc);
2654 if (ret == -EAGAIN)
2655 ret = __block_write_full_page(inode, page, get_block, wbc,
2656 end_buffer_async_write);
2657 return ret;
2658}
2659EXPORT_SYMBOL(nobh_writepage);
2660
2661int nobh_truncate_page(struct address_space *mapping,
2662 loff_t from, get_block_t *get_block)
2663{
2664 pgoff_t index = from >> PAGE_CACHE_SHIFT;
2665 unsigned offset = from & (PAGE_CACHE_SIZE-1);
2666 unsigned blocksize;
2667 sector_t iblock;
2668 unsigned length, pos;
2669 struct inode *inode = mapping->host;
2670 struct page *page;
2671 struct buffer_head map_bh;
2672 int err;
2673
2674 blocksize = 1 << inode->i_blkbits;
2675 length = offset & (blocksize - 1);
2676
2677
2678 if (!length)
2679 return 0;
2680
2681 length = blocksize - length;
2682 iblock = (sector_t)index << (PAGE_CACHE_SHIFT - inode->i_blkbits);
2683
2684 page = grab_cache_page(mapping, index);
2685 err = -ENOMEM;
2686 if (!page)
2687 goto out;
2688
2689 if (page_has_buffers(page)) {
2690has_buffers:
2691 unlock_page(page);
2692 page_cache_release(page);
2693 return block_truncate_page(mapping, from, get_block);
2694 }
2695
2696
2697 pos = blocksize;
2698 while (offset >= pos) {
2699 iblock++;
2700 pos += blocksize;
2701 }
2702
2703 map_bh.b_size = blocksize;
2704 map_bh.b_state = 0;
2705 err = get_block(inode, iblock, &map_bh, 0);
2706 if (err)
2707 goto unlock;
2708
2709 if (!buffer_mapped(&map_bh))
2710 goto unlock;
2711
2712
2713 if (!PageUptodate(page)) {
2714 err = mapping->a_ops->readpage(NULL, page);
2715 if (err) {
2716 page_cache_release(page);
2717 goto out;
2718 }
2719 lock_page(page);
2720 if (!PageUptodate(page)) {
2721 err = -EIO;
2722 goto unlock;
2723 }
2724 if (page_has_buffers(page))
2725 goto has_buffers;
2726 }
2727 zero_user(page, offset, length);
2728 set_page_dirty(page);
2729 err = 0;
2730
2731unlock:
2732 unlock_page(page);
2733 page_cache_release(page);
2734out:
2735 return err;
2736}
2737EXPORT_SYMBOL(nobh_truncate_page);
2738
2739int block_truncate_page(struct address_space *mapping,
2740 loff_t from, get_block_t *get_block)
2741{
2742 pgoff_t index = from >> PAGE_CACHE_SHIFT;
2743 unsigned offset = from & (PAGE_CACHE_SIZE-1);
2744 unsigned blocksize;
2745 sector_t iblock;
2746 unsigned length, pos;
2747 struct inode *inode = mapping->host;
2748 struct page *page;
2749 struct buffer_head *bh;
2750 int err;
2751
2752 blocksize = 1 << inode->i_blkbits;
2753 length = offset & (blocksize - 1);
2754
2755
2756 if (!length)
2757 return 0;
2758
2759 length = blocksize - length;
2760 iblock = (sector_t)index << (PAGE_CACHE_SHIFT - inode->i_blkbits);
2761
2762 page = grab_cache_page(mapping, index);
2763 err = -ENOMEM;
2764 if (!page)
2765 goto out;
2766
2767 if (!page_has_buffers(page))
2768 create_empty_buffers(page, blocksize, 0);
2769
2770
2771 bh = page_buffers(page);
2772 pos = blocksize;
2773 while (offset >= pos) {
2774 bh = bh->b_this_page;
2775 iblock++;
2776 pos += blocksize;
2777 }
2778
2779 err = 0;
2780 if (!buffer_mapped(bh)) {
2781 WARN_ON(bh->b_size != blocksize);
2782 err = get_block(inode, iblock, bh, 0);
2783 if (err)
2784 goto unlock;
2785
2786 if (!buffer_mapped(bh))
2787 goto unlock;
2788 }
2789
2790
2791 if (PageUptodate(page))
2792 set_buffer_uptodate(bh);
2793
2794 if (!buffer_uptodate(bh) && !buffer_delay(bh) && !buffer_unwritten(bh)) {
2795 err = -EIO;
2796 ll_rw_block(READ, 1, &bh);
2797 wait_on_buffer(bh);
2798
2799 if (!buffer_uptodate(bh))
2800 goto unlock;
2801 }
2802
2803 zero_user(page, offset, length);
2804 mark_buffer_dirty(bh);
2805 err = 0;
2806
2807unlock:
2808 unlock_page(page);
2809 page_cache_release(page);
2810out:
2811 return err;
2812}
2813EXPORT_SYMBOL(block_truncate_page);
2814
2815
2816
2817
2818
2819int block_write_full_page_endio(struct page *page, get_block_t *get_block,
2820 struct writeback_control *wbc, bh_end_io_t *handler)
2821{
2822 struct inode * const inode = page->mapping->host;
2823 loff_t i_size = i_size_read(inode);
2824 const pgoff_t end_index = i_size >> PAGE_CACHE_SHIFT;
2825 unsigned offset;
2826
2827
2828 if (page->index < end_index)
2829 return __block_write_full_page(inode, page, get_block, wbc,
2830 handler);
2831
2832
2833 offset = i_size & (PAGE_CACHE_SIZE-1);
2834 if (page->index >= end_index+1 || !offset) {
2835
2836
2837
2838
2839
2840 do_invalidatepage(page, 0);
2841 unlock_page(page);
2842 return 0;
2843 }
2844
2845
2846
2847
2848
2849
2850
2851
2852 zero_user_segment(page, offset, PAGE_CACHE_SIZE);
2853 return __block_write_full_page(inode, page, get_block, wbc, handler);
2854}
2855EXPORT_SYMBOL(block_write_full_page_endio);
2856
2857
2858
2859
2860int block_write_full_page(struct page *page, get_block_t *get_block,
2861 struct writeback_control *wbc)
2862{
2863 return block_write_full_page_endio(page, get_block, wbc,
2864 end_buffer_async_write);
2865}
2866EXPORT_SYMBOL(block_write_full_page);
2867
2868sector_t generic_block_bmap(struct address_space *mapping, sector_t block,
2869 get_block_t *get_block)
2870{
2871 struct buffer_head tmp;
2872 struct inode *inode = mapping->host;
2873 tmp.b_state = 0;
2874 tmp.b_blocknr = 0;
2875 tmp.b_size = 1 << inode->i_blkbits;
2876 get_block(inode, block, &tmp, 0);
2877 return tmp.b_blocknr;
2878}
2879EXPORT_SYMBOL(generic_block_bmap);
2880
2881static void end_bio_bh_io_sync(struct bio *bio, int err)
2882{
2883 struct buffer_head *bh = bio->bi_private;
2884
2885 if (err == -EOPNOTSUPP) {
2886 set_bit(BIO_EOPNOTSUPP, &bio->bi_flags);
2887 }
2888
2889 if (unlikely (test_bit(BIO_QUIET,&bio->bi_flags)))
2890 set_bit(BH_Quiet, &bh->b_state);
2891
2892 bh->b_end_io(bh, test_bit(BIO_UPTODATE, &bio->bi_flags));
2893 bio_put(bio);
2894}
2895
2896
2897
2898
2899
2900
2901
2902
2903
2904
2905
2906
2907
2908static void guard_bh_eod(int rw, struct bio *bio, struct buffer_head *bh)
2909{
2910 sector_t maxsector;
2911 unsigned bytes;
2912
2913 maxsector = i_size_read(bio->bi_bdev->bd_inode) >> 9;
2914 if (!maxsector)
2915 return;
2916
2917
2918
2919
2920
2921
2922 if (unlikely(bio->bi_sector >= maxsector))
2923 return;
2924
2925 maxsector -= bio->bi_sector;
2926 bytes = bio->bi_size;
2927 if (likely((bytes >> 9) <= maxsector))
2928 return;
2929
2930
2931 bytes = maxsector << 9;
2932
2933
2934 bio->bi_size = bytes;
2935 bio->bi_io_vec[0].bv_len = bytes;
2936
2937
2938 if ((rw & RW_MASK) == READ) {
2939 void *kaddr = kmap_atomic(bh->b_page);
2940 memset(kaddr + bh_offset(bh) + bytes, 0, bh->b_size - bytes);
2941 kunmap_atomic(kaddr);
2942 }
2943}
2944
2945int submit_bh(int rw, struct buffer_head * bh)
2946{
2947 struct bio *bio;
2948 int ret = 0;
2949
2950 BUG_ON(!buffer_locked(bh));
2951 BUG_ON(!buffer_mapped(bh));
2952 BUG_ON(!bh->b_end_io);
2953 BUG_ON(buffer_delay(bh));
2954 BUG_ON(buffer_unwritten(bh));
2955
2956
2957
2958
2959 if (test_set_buffer_req(bh) && (rw & WRITE))
2960 clear_buffer_write_io_error(bh);
2961
2962
2963
2964
2965
2966 bio = bio_alloc(GFP_NOIO, 1);
2967
2968 bio->bi_sector = bh->b_blocknr * (bh->b_size >> 9);
2969 bio->bi_bdev = bh->b_bdev;
2970 bio->bi_io_vec[0].bv_page = bh->b_page;
2971 bio->bi_io_vec[0].bv_len = bh->b_size;
2972 bio->bi_io_vec[0].bv_offset = bh_offset(bh);
2973
2974 bio->bi_vcnt = 1;
2975 bio->bi_idx = 0;
2976 bio->bi_size = bh->b_size;
2977
2978 bio->bi_end_io = end_bio_bh_io_sync;
2979 bio->bi_private = bh;
2980
2981
2982 guard_bh_eod(rw, bio, bh);
2983
2984 bio_get(bio);
2985 submit_bio(rw, bio);
2986
2987 if (bio_flagged(bio, BIO_EOPNOTSUPP))
2988 ret = -EOPNOTSUPP;
2989
2990 bio_put(bio);
2991 return ret;
2992}
2993EXPORT_SYMBOL(submit_bh);
2994
2995
2996
2997
2998
2999
3000
3001
3002
3003
3004
3005
3006
3007
3008
3009
3010
3011
3012
3013
3014
3015
3016
3017
3018
3019
3020void ll_rw_block(int rw, int nr, struct buffer_head *bhs[])
3021{
3022 int i;
3023
3024 for (i = 0; i < nr; i++) {
3025 struct buffer_head *bh = bhs[i];
3026
3027 if (!trylock_buffer(bh))
3028 continue;
3029 if (rw == WRITE) {
3030 if (test_clear_buffer_dirty(bh)) {
3031 bh->b_end_io = end_buffer_write_sync;
3032 get_bh(bh);
3033 submit_bh(WRITE, bh);
3034 continue;
3035 }
3036 } else {
3037 if (!buffer_uptodate(bh)) {
3038 bh->b_end_io = end_buffer_read_sync;
3039 get_bh(bh);
3040 submit_bh(rw, bh);
3041 continue;
3042 }
3043 }
3044 unlock_buffer(bh);
3045 }
3046}
3047EXPORT_SYMBOL(ll_rw_block);
3048
3049void write_dirty_buffer(struct buffer_head *bh, int rw)
3050{
3051 lock_buffer(bh);
3052 if (!test_clear_buffer_dirty(bh)) {
3053 unlock_buffer(bh);
3054 return;
3055 }
3056 bh->b_end_io = end_buffer_write_sync;
3057 get_bh(bh);
3058 submit_bh(rw, bh);
3059}
3060EXPORT_SYMBOL(write_dirty_buffer);
3061
3062
3063
3064
3065
3066
3067int __sync_dirty_buffer(struct buffer_head *bh, int rw)
3068{
3069 int ret = 0;
3070
3071 WARN_ON(atomic_read(&bh->b_count) < 1);
3072 lock_buffer(bh);
3073 if (test_clear_buffer_dirty(bh)) {
3074 get_bh(bh);
3075 bh->b_end_io = end_buffer_write_sync;
3076 ret = submit_bh(rw, bh);
3077 wait_on_buffer(bh);
3078 if (!ret && !buffer_uptodate(bh))
3079 ret = -EIO;
3080 } else {
3081 unlock_buffer(bh);
3082 }
3083 return ret;
3084}
3085EXPORT_SYMBOL(__sync_dirty_buffer);
3086
3087int sync_dirty_buffer(struct buffer_head *bh)
3088{
3089 return __sync_dirty_buffer(bh, WRITE_SYNC);
3090}
3091EXPORT_SYMBOL(sync_dirty_buffer);
3092
3093
3094
3095
3096
3097
3098
3099
3100
3101
3102
3103
3104
3105
3106
3107
3108
3109
3110
3111
3112
3113static inline int buffer_busy(struct buffer_head *bh)
3114{
3115 return atomic_read(&bh->b_count) |
3116 (bh->b_state & ((1 << BH_Dirty) | (1 << BH_Lock)));
3117}
3118
3119static int
3120drop_buffers(struct page *page, struct buffer_head **buffers_to_free)
3121{
3122 struct buffer_head *head = page_buffers(page);
3123 struct buffer_head *bh;
3124
3125 bh = head;
3126 do {
3127 if (buffer_write_io_error(bh) && page->mapping)
3128 set_bit(AS_EIO, &page->mapping->flags);
3129 if (buffer_busy(bh))
3130 goto failed;
3131 bh = bh->b_this_page;
3132 } while (bh != head);
3133
3134 do {
3135 struct buffer_head *next = bh->b_this_page;
3136
3137 if (bh->b_assoc_map)
3138 __remove_assoc_queue(bh);
3139 bh = next;
3140 } while (bh != head);
3141 *buffers_to_free = head;
3142 __clear_page_buffers(page);
3143 return 1;
3144failed:
3145 return 0;
3146}
3147
3148int try_to_free_buffers(struct page *page)
3149{
3150 struct address_space * const mapping = page->mapping;
3151 struct buffer_head *buffers_to_free = NULL;
3152 int ret = 0;
3153
3154 BUG_ON(!PageLocked(page));
3155 if (PageWriteback(page))
3156 return 0;
3157
3158 if (mapping == NULL) {
3159 ret = drop_buffers(page, &buffers_to_free);
3160 goto out;
3161 }
3162
3163 spin_lock(&mapping->private_lock);
3164 ret = drop_buffers(page, &buffers_to_free);
3165
3166
3167
3168
3169
3170
3171
3172
3173
3174
3175
3176
3177
3178
3179
3180 if (ret)
3181 cancel_dirty_page(page, PAGE_CACHE_SIZE);
3182 spin_unlock(&mapping->private_lock);
3183out:
3184 if (buffers_to_free) {
3185 struct buffer_head *bh = buffers_to_free;
3186
3187 do {
3188 struct buffer_head *next = bh->b_this_page;
3189 free_buffer_head(bh);
3190 bh = next;
3191 } while (bh != buffers_to_free);
3192 }
3193 return ret;
3194}
3195EXPORT_SYMBOL(try_to_free_buffers);
3196
3197
3198
3199
3200
3201
3202
3203
3204SYSCALL_DEFINE2(bdflush, int, func, long, data)
3205{
3206 static int msg_count;
3207
3208 if (!capable(CAP_SYS_ADMIN))
3209 return -EPERM;
3210
3211 if (msg_count < 5) {
3212 msg_count++;
3213 printk(KERN_INFO
3214 "warning: process `%s' used the obsolete bdflush"
3215 " system call\n", current->comm);
3216 printk(KERN_INFO "Fix your initscripts?\n");
3217 }
3218
3219 if (func == 1)
3220 do_exit(0);
3221 return 0;
3222}
3223
3224
3225
3226
3227static struct kmem_cache *bh_cachep __read_mostly;
3228
3229
3230
3231
3232
3233static int max_buffer_heads;
3234
3235int buffer_heads_over_limit;
3236
3237struct bh_accounting {
3238 int nr;
3239 int ratelimit;
3240};
3241
3242static DEFINE_PER_CPU(struct bh_accounting, bh_accounting) = {0, 0};
3243
3244static void recalc_bh_state(void)
3245{
3246 int i;
3247 int tot = 0;
3248
3249 if (__this_cpu_inc_return(bh_accounting.ratelimit) - 1 < 4096)
3250 return;
3251 __this_cpu_write(bh_accounting.ratelimit, 0);
3252 for_each_online_cpu(i)
3253 tot += per_cpu(bh_accounting, i).nr;
3254 buffer_heads_over_limit = (tot > max_buffer_heads);
3255}
3256
3257struct buffer_head *alloc_buffer_head(gfp_t gfp_flags)
3258{
3259 struct buffer_head *ret = kmem_cache_zalloc(bh_cachep, gfp_flags);
3260 if (ret) {
3261 INIT_LIST_HEAD(&ret->b_assoc_buffers);
3262 preempt_disable();
3263 __this_cpu_inc(bh_accounting.nr);
3264 recalc_bh_state();
3265 preempt_enable();
3266 }
3267 return ret;
3268}
3269EXPORT_SYMBOL(alloc_buffer_head);
3270
3271void free_buffer_head(struct buffer_head *bh)
3272{
3273 BUG_ON(!list_empty(&bh->b_assoc_buffers));
3274 kmem_cache_free(bh_cachep, bh);
3275 preempt_disable();
3276 __this_cpu_dec(bh_accounting.nr);
3277 recalc_bh_state();
3278 preempt_enable();
3279}
3280EXPORT_SYMBOL(free_buffer_head);
3281
3282static void buffer_exit_cpu(int cpu)
3283{
3284 int i;
3285 struct bh_lru *b = &per_cpu(bh_lrus, cpu);
3286
3287 for (i = 0; i < BH_LRU_SIZE; i++) {
3288 brelse(b->bhs[i]);
3289 b->bhs[i] = NULL;
3290 }
3291 this_cpu_add(bh_accounting.nr, per_cpu(bh_accounting, cpu).nr);
3292 per_cpu(bh_accounting, cpu).nr = 0;
3293}
3294
3295static int buffer_cpu_notify(struct notifier_block *self,
3296 unsigned long action, void *hcpu)
3297{
3298 if (action == CPU_DEAD || action == CPU_DEAD_FROZEN)
3299 buffer_exit_cpu((unsigned long)hcpu);
3300 return NOTIFY_OK;
3301}
3302
3303
3304
3305
3306
3307
3308
3309
3310int bh_uptodate_or_lock(struct buffer_head *bh)
3311{
3312 if (!buffer_uptodate(bh)) {
3313 lock_buffer(bh);
3314 if (!buffer_uptodate(bh))
3315 return 0;
3316 unlock_buffer(bh);
3317 }
3318 return 1;
3319}
3320EXPORT_SYMBOL(bh_uptodate_or_lock);
3321
3322
3323
3324
3325
3326
3327
3328int bh_submit_read(struct buffer_head *bh)
3329{
3330 BUG_ON(!buffer_locked(bh));
3331
3332 if (buffer_uptodate(bh)) {
3333 unlock_buffer(bh);
3334 return 0;
3335 }
3336
3337 get_bh(bh);
3338 bh->b_end_io = end_buffer_read_sync;
3339 submit_bh(READ, bh);
3340 wait_on_buffer(bh);
3341 if (buffer_uptodate(bh))
3342 return 0;
3343 return -EIO;
3344}
3345EXPORT_SYMBOL(bh_submit_read);
3346
3347void __init buffer_init(void)
3348{
3349 int nrpages;
3350
3351 bh_cachep = kmem_cache_create("buffer_head",
3352 sizeof(struct buffer_head), 0,
3353 (SLAB_RECLAIM_ACCOUNT|SLAB_PANIC|
3354 SLAB_MEM_SPREAD),
3355 NULL);
3356
3357
3358
3359
3360 nrpages = (nr_free_buffer_pages() * 10) / 100;
3361 max_buffer_heads = nrpages * (PAGE_SIZE / sizeof(struct buffer_head));
3362 hotcpu_notifier(buffer_cpu_notify, 0);
3363}
3364