1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21#include <linux/kernel.h>
22#include <linux/syscalls.h>
23#include <linux/fs.h>
24#include <linux/mm.h>
25#include <linux/percpu.h>
26#include <linux/slab.h>
27#include <linux/capability.h>
28#include <linux/blkdev.h>
29#include <linux/file.h>
30#include <linux/quotaops.h>
31#include <linux/highmem.h>
32#include <linux/export.h>
33#include <linux/backing-dev.h>
34#include <linux/writeback.h>
35#include <linux/hash.h>
36#include <linux/suspend.h>
37#include <linux/buffer_head.h>
38#include <linux/task_io_accounting_ops.h>
39#include <linux/bio.h>
40#include <linux/notifier.h>
41#include <linux/cpu.h>
42#include <linux/bitops.h>
43#include <linux/mpage.h>
44#include <linux/bit_spinlock.h>
45#include <trace/events/block.h>
46
47static int fsync_buffers_list(spinlock_t *lock, struct list_head *list);
48static int submit_bh_wbc(int rw, struct buffer_head *bh,
49 unsigned long bio_flags,
50 struct writeback_control *wbc);
51
52#define BH_ENTRY(list) list_entry((list), struct buffer_head, b_assoc_buffers)
53
54void init_buffer(struct buffer_head *bh, bh_end_io_t *handler, void *private)
55{
56 bh->b_end_io = handler;
57 bh->b_private = private;
58}
59EXPORT_SYMBOL(init_buffer);
60
61inline void touch_buffer(struct buffer_head *bh)
62{
63 trace_block_touch_buffer(bh);
64 mark_page_accessed(bh->b_page);
65}
66EXPORT_SYMBOL(touch_buffer);
67
68void __lock_buffer(struct buffer_head *bh)
69{
70 wait_on_bit_lock_io(&bh->b_state, BH_Lock, TASK_UNINTERRUPTIBLE);
71}
72EXPORT_SYMBOL(__lock_buffer);
73
74void unlock_buffer(struct buffer_head *bh)
75{
76 clear_bit_unlock(BH_Lock, &bh->b_state);
77 smp_mb__after_atomic();
78 wake_up_bit(&bh->b_state, BH_Lock);
79}
80EXPORT_SYMBOL(unlock_buffer);
81
82
83
84
85
86
87void buffer_check_dirty_writeback(struct page *page,
88 bool *dirty, bool *writeback)
89{
90 struct buffer_head *head, *bh;
91 *dirty = false;
92 *writeback = false;
93
94 BUG_ON(!PageLocked(page));
95
96 if (!page_has_buffers(page))
97 return;
98
99 if (PageWriteback(page))
100 *writeback = true;
101
102 head = page_buffers(page);
103 bh = head;
104 do {
105 if (buffer_locked(bh))
106 *writeback = true;
107
108 if (buffer_dirty(bh))
109 *dirty = true;
110
111 bh = bh->b_this_page;
112 } while (bh != head);
113}
114EXPORT_SYMBOL(buffer_check_dirty_writeback);
115
116
117
118
119
120
121void __wait_on_buffer(struct buffer_head * bh)
122{
123 wait_on_bit_io(&bh->b_state, BH_Lock, TASK_UNINTERRUPTIBLE);
124}
125EXPORT_SYMBOL(__wait_on_buffer);
126
127static void
128__clear_page_buffers(struct page *page)
129{
130 ClearPagePrivate(page);
131 set_page_private(page, 0);
132 put_page(page);
133}
134
135static void buffer_io_error(struct buffer_head *bh, char *msg)
136{
137 if (!test_bit(BH_Quiet, &bh->b_state))
138 printk_ratelimited(KERN_ERR
139 "Buffer I/O error on dev %pg, logical block %llu%s\n",
140 bh->b_bdev, (unsigned long long)bh->b_blocknr, msg);
141}
142
143
144
145
146
147
148
149
150
151static void __end_buffer_read_notouch(struct buffer_head *bh, int uptodate)
152{
153 if (uptodate) {
154 set_buffer_uptodate(bh);
155 } else {
156
157 clear_buffer_uptodate(bh);
158 }
159 unlock_buffer(bh);
160}
161
162
163
164
165
166void end_buffer_read_sync(struct buffer_head *bh, int uptodate)
167{
168 __end_buffer_read_notouch(bh, uptodate);
169 put_bh(bh);
170}
171EXPORT_SYMBOL(end_buffer_read_sync);
172
173void end_buffer_write_sync(struct buffer_head *bh, int uptodate)
174{
175 if (uptodate) {
176 set_buffer_uptodate(bh);
177 } else {
178 buffer_io_error(bh, ", lost sync page write");
179 set_buffer_write_io_error(bh);
180 clear_buffer_uptodate(bh);
181 }
182 unlock_buffer(bh);
183 put_bh(bh);
184}
185EXPORT_SYMBOL(end_buffer_write_sync);
186
187
188
189
190
191
192
193
194
195
196
197
198static struct buffer_head *
199__find_get_block_slow(struct block_device *bdev, sector_t block)
200{
201 struct inode *bd_inode = bdev->bd_inode;
202 struct address_space *bd_mapping = bd_inode->i_mapping;
203 struct buffer_head *ret = NULL;
204 pgoff_t index;
205 struct buffer_head *bh;
206 struct buffer_head *head;
207 struct page *page;
208 int all_mapped = 1;
209
210 index = block >> (PAGE_SHIFT - bd_inode->i_blkbits);
211 page = find_get_page_flags(bd_mapping, index, FGP_ACCESSED);
212 if (!page)
213 goto out;
214
215 spin_lock(&bd_mapping->private_lock);
216 if (!page_has_buffers(page))
217 goto out_unlock;
218 head = page_buffers(page);
219 bh = head;
220 do {
221 if (!buffer_mapped(bh))
222 all_mapped = 0;
223 else if (bh->b_blocknr == block) {
224 ret = bh;
225 get_bh(bh);
226 goto out_unlock;
227 }
228 bh = bh->b_this_page;
229 } while (bh != head);
230
231
232
233
234
235
236 if (all_mapped) {
237 printk("__find_get_block_slow() failed. "
238 "block=%llu, b_blocknr=%llu\n",
239 (unsigned long long)block,
240 (unsigned long long)bh->b_blocknr);
241 printk("b_state=0x%08lx, b_size=%zu\n",
242 bh->b_state, bh->b_size);
243 printk("device %pg blocksize: %d\n", bdev,
244 1 << bd_inode->i_blkbits);
245 }
246out_unlock:
247 spin_unlock(&bd_mapping->private_lock);
248 put_page(page);
249out:
250 return ret;
251}
252
253
254
255
256static void free_more_memory(void)
257{
258 struct zone *zone;
259 int nid;
260
261 wakeup_flusher_threads(1024, WB_REASON_FREE_MORE_MEM);
262 yield();
263
264 for_each_online_node(nid) {
265 (void)first_zones_zonelist(node_zonelist(nid, GFP_NOFS),
266 gfp_zone(GFP_NOFS), NULL,
267 &zone);
268 if (zone)
269 try_to_free_pages(node_zonelist(nid, GFP_NOFS), 0,
270 GFP_NOFS, NULL);
271 }
272}
273
274
275
276
277
278static void end_buffer_async_read(struct buffer_head *bh, int uptodate)
279{
280 unsigned long flags;
281 struct buffer_head *first;
282 struct buffer_head *tmp;
283 struct page *page;
284 int page_uptodate = 1;
285
286 BUG_ON(!buffer_async_read(bh));
287
288 page = bh->b_page;
289 if (uptodate) {
290 set_buffer_uptodate(bh);
291 } else {
292 clear_buffer_uptodate(bh);
293 buffer_io_error(bh, ", async page read");
294 SetPageError(page);
295 }
296
297
298
299
300
301
302 first = page_buffers(page);
303 local_irq_save(flags);
304 bit_spin_lock(BH_Uptodate_Lock, &first->b_state);
305 clear_buffer_async_read(bh);
306 unlock_buffer(bh);
307 tmp = bh;
308 do {
309 if (!buffer_uptodate(tmp))
310 page_uptodate = 0;
311 if (buffer_async_read(tmp)) {
312 BUG_ON(!buffer_locked(tmp));
313 goto still_busy;
314 }
315 tmp = tmp->b_this_page;
316 } while (tmp != bh);
317 bit_spin_unlock(BH_Uptodate_Lock, &first->b_state);
318 local_irq_restore(flags);
319
320
321
322
323
324 if (page_uptodate && !PageError(page))
325 SetPageUptodate(page);
326 unlock_page(page);
327 return;
328
329still_busy:
330 bit_spin_unlock(BH_Uptodate_Lock, &first->b_state);
331 local_irq_restore(flags);
332 return;
333}
334
335
336
337
338
339void end_buffer_async_write(struct buffer_head *bh, int uptodate)
340{
341 unsigned long flags;
342 struct buffer_head *first;
343 struct buffer_head *tmp;
344 struct page *page;
345
346 BUG_ON(!buffer_async_write(bh));
347
348 page = bh->b_page;
349 if (uptodate) {
350 set_buffer_uptodate(bh);
351 } else {
352 buffer_io_error(bh, ", lost async page write");
353 set_bit(AS_EIO, &page->mapping->flags);
354 set_buffer_write_io_error(bh);
355 clear_buffer_uptodate(bh);
356 SetPageError(page);
357 }
358
359 first = page_buffers(page);
360 local_irq_save(flags);
361 bit_spin_lock(BH_Uptodate_Lock, &first->b_state);
362
363 clear_buffer_async_write(bh);
364 unlock_buffer(bh);
365 tmp = bh->b_this_page;
366 while (tmp != bh) {
367 if (buffer_async_write(tmp)) {
368 BUG_ON(!buffer_locked(tmp));
369 goto still_busy;
370 }
371 tmp = tmp->b_this_page;
372 }
373 bit_spin_unlock(BH_Uptodate_Lock, &first->b_state);
374 local_irq_restore(flags);
375 end_page_writeback(page);
376 return;
377
378still_busy:
379 bit_spin_unlock(BH_Uptodate_Lock, &first->b_state);
380 local_irq_restore(flags);
381 return;
382}
383EXPORT_SYMBOL(end_buffer_async_write);
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406static void mark_buffer_async_read(struct buffer_head *bh)
407{
408 bh->b_end_io = end_buffer_async_read;
409 set_buffer_async_read(bh);
410}
411
412static void mark_buffer_async_write_endio(struct buffer_head *bh,
413 bh_end_io_t *handler)
414{
415 bh->b_end_io = handler;
416 set_buffer_async_write(bh);
417}
418
419void mark_buffer_async_write(struct buffer_head *bh)
420{
421 mark_buffer_async_write_endio(bh, end_buffer_async_write);
422}
423EXPORT_SYMBOL(mark_buffer_async_write);
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478static void __remove_assoc_queue(struct buffer_head *bh)
479{
480 list_del_init(&bh->b_assoc_buffers);
481 WARN_ON(!bh->b_assoc_map);
482 if (buffer_write_io_error(bh))
483 set_bit(AS_EIO, &bh->b_assoc_map->flags);
484 bh->b_assoc_map = NULL;
485}
486
487int inode_has_buffers(struct inode *inode)
488{
489 return !list_empty(&inode->i_data.private_list);
490}
491
492
493
494
495
496
497
498
499
500
501
502static int osync_buffers_list(spinlock_t *lock, struct list_head *list)
503{
504 struct buffer_head *bh;
505 struct list_head *p;
506 int err = 0;
507
508 spin_lock(lock);
509repeat:
510 list_for_each_prev(p, list) {
511 bh = BH_ENTRY(p);
512 if (buffer_locked(bh)) {
513 get_bh(bh);
514 spin_unlock(lock);
515 wait_on_buffer(bh);
516 if (!buffer_uptodate(bh))
517 err = -EIO;
518 brelse(bh);
519 spin_lock(lock);
520 goto repeat;
521 }
522 }
523 spin_unlock(lock);
524 return err;
525}
526
527static void do_thaw_one(struct super_block *sb, void *unused)
528{
529 while (sb->s_bdev && !thaw_bdev(sb->s_bdev, sb))
530 printk(KERN_WARNING "Emergency Thaw on %pg\n", sb->s_bdev);
531}
532
533static void do_thaw_all(struct work_struct *work)
534{
535 iterate_supers(do_thaw_one, NULL);
536 kfree(work);
537 printk(KERN_WARNING "Emergency Thaw complete\n");
538}
539
540
541
542
543
544
545void emergency_thaw_all(void)
546{
547 struct work_struct *work;
548
549 work = kmalloc(sizeof(*work), GFP_ATOMIC);
550 if (work) {
551 INIT_WORK(work, do_thaw_all);
552 schedule_work(work);
553 }
554}
555
556
557
558
559
560
561
562
563
564
565
566
567int sync_mapping_buffers(struct address_space *mapping)
568{
569 struct address_space *buffer_mapping = mapping->private_data;
570
571 if (buffer_mapping == NULL || list_empty(&mapping->private_list))
572 return 0;
573
574 return fsync_buffers_list(&buffer_mapping->private_lock,
575 &mapping->private_list);
576}
577EXPORT_SYMBOL(sync_mapping_buffers);
578
579
580
581
582
583
584
585void write_boundary_block(struct block_device *bdev,
586 sector_t bblock, unsigned blocksize)
587{
588 struct buffer_head *bh = __find_get_block(bdev, bblock + 1, blocksize);
589 if (bh) {
590 if (buffer_dirty(bh))
591 ll_rw_block(WRITE, 1, &bh);
592 put_bh(bh);
593 }
594}
595
596void mark_buffer_dirty_inode(struct buffer_head *bh, struct inode *inode)
597{
598 struct address_space *mapping = inode->i_mapping;
599 struct address_space *buffer_mapping = bh->b_page->mapping;
600
601 mark_buffer_dirty(bh);
602 if (!mapping->private_data) {
603 mapping->private_data = buffer_mapping;
604 } else {
605 BUG_ON(mapping->private_data != buffer_mapping);
606 }
607 if (!bh->b_assoc_map) {
608 spin_lock(&buffer_mapping->private_lock);
609 list_move_tail(&bh->b_assoc_buffers,
610 &mapping->private_list);
611 bh->b_assoc_map = mapping;
612 spin_unlock(&buffer_mapping->private_lock);
613 }
614}
615EXPORT_SYMBOL(mark_buffer_dirty_inode);
616
617
618
619
620
621
622
623
624
625
626static void __set_page_dirty(struct page *page, struct address_space *mapping,
627 int warn)
628{
629 unsigned long flags;
630
631 spin_lock_irqsave(&mapping->tree_lock, flags);
632 if (page->mapping) {
633 WARN_ON_ONCE(warn && !PageUptodate(page));
634 account_page_dirtied(page, mapping);
635 radix_tree_tag_set(&mapping->page_tree,
636 page_index(page), PAGECACHE_TAG_DIRTY);
637 }
638 spin_unlock_irqrestore(&mapping->tree_lock, flags);
639}
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666int __set_page_dirty_buffers(struct page *page)
667{
668 int newly_dirty;
669 struct address_space *mapping = page_mapping(page);
670
671 if (unlikely(!mapping))
672 return !TestSetPageDirty(page);
673
674 spin_lock(&mapping->private_lock);
675 if (page_has_buffers(page)) {
676 struct buffer_head *head = page_buffers(page);
677 struct buffer_head *bh = head;
678
679 do {
680 set_buffer_dirty(bh);
681 bh = bh->b_this_page;
682 } while (bh != head);
683 }
684
685
686
687
688 lock_page_memcg(page);
689 newly_dirty = !TestSetPageDirty(page);
690 spin_unlock(&mapping->private_lock);
691
692 if (newly_dirty)
693 __set_page_dirty(page, mapping, 1);
694
695 unlock_page_memcg(page);
696
697 if (newly_dirty)
698 __mark_inode_dirty(mapping->host, I_DIRTY_PAGES);
699
700 return newly_dirty;
701}
702EXPORT_SYMBOL(__set_page_dirty_buffers);
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723static int fsync_buffers_list(spinlock_t *lock, struct list_head *list)
724{
725 struct buffer_head *bh;
726 struct list_head tmp;
727 struct address_space *mapping;
728 int err = 0, err2;
729 struct blk_plug plug;
730
731 INIT_LIST_HEAD(&tmp);
732 blk_start_plug(&plug);
733
734 spin_lock(lock);
735 while (!list_empty(list)) {
736 bh = BH_ENTRY(list->next);
737 mapping = bh->b_assoc_map;
738 __remove_assoc_queue(bh);
739
740
741 smp_mb();
742 if (buffer_dirty(bh) || buffer_locked(bh)) {
743 list_add(&bh->b_assoc_buffers, &tmp);
744 bh->b_assoc_map = mapping;
745 if (buffer_dirty(bh)) {
746 get_bh(bh);
747 spin_unlock(lock);
748
749
750
751
752
753
754
755 write_dirty_buffer(bh, WRITE_SYNC);
756
757
758
759
760
761
762
763 brelse(bh);
764 spin_lock(lock);
765 }
766 }
767 }
768
769 spin_unlock(lock);
770 blk_finish_plug(&plug);
771 spin_lock(lock);
772
773 while (!list_empty(&tmp)) {
774 bh = BH_ENTRY(tmp.prev);
775 get_bh(bh);
776 mapping = bh->b_assoc_map;
777 __remove_assoc_queue(bh);
778
779
780 smp_mb();
781 if (buffer_dirty(bh)) {
782 list_add(&bh->b_assoc_buffers,
783 &mapping->private_list);
784 bh->b_assoc_map = mapping;
785 }
786 spin_unlock(lock);
787 wait_on_buffer(bh);
788 if (!buffer_uptodate(bh))
789 err = -EIO;
790 brelse(bh);
791 spin_lock(lock);
792 }
793
794 spin_unlock(lock);
795 err2 = osync_buffers_list(lock, list);
796 if (err)
797 return err;
798 else
799 return err2;
800}
801
802
803
804
805
806
807
808
809
810
811void invalidate_inode_buffers(struct inode *inode)
812{
813 if (inode_has_buffers(inode)) {
814 struct address_space *mapping = &inode->i_data;
815 struct list_head *list = &mapping->private_list;
816 struct address_space *buffer_mapping = mapping->private_data;
817
818 spin_lock(&buffer_mapping->private_lock);
819 while (!list_empty(list))
820 __remove_assoc_queue(BH_ENTRY(list->next));
821 spin_unlock(&buffer_mapping->private_lock);
822 }
823}
824EXPORT_SYMBOL(invalidate_inode_buffers);
825
826
827
828
829
830
831
832int remove_inode_buffers(struct inode *inode)
833{
834 int ret = 1;
835
836 if (inode_has_buffers(inode)) {
837 struct address_space *mapping = &inode->i_data;
838 struct list_head *list = &mapping->private_list;
839 struct address_space *buffer_mapping = mapping->private_data;
840
841 spin_lock(&buffer_mapping->private_lock);
842 while (!list_empty(list)) {
843 struct buffer_head *bh = BH_ENTRY(list->next);
844 if (buffer_dirty(bh)) {
845 ret = 0;
846 break;
847 }
848 __remove_assoc_queue(bh);
849 }
850 spin_unlock(&buffer_mapping->private_lock);
851 }
852 return ret;
853}
854
855
856
857
858
859
860
861
862
863
864struct buffer_head *alloc_page_buffers(struct page *page, unsigned long size,
865 int retry)
866{
867 struct buffer_head *bh, *head;
868 long offset;
869
870try_again:
871 head = NULL;
872 offset = PAGE_SIZE;
873 while ((offset -= size) >= 0) {
874 bh = alloc_buffer_head(GFP_NOFS);
875 if (!bh)
876 goto no_grow;
877
878 bh->b_this_page = head;
879 bh->b_blocknr = -1;
880 head = bh;
881
882 bh->b_size = size;
883
884
885 set_bh_page(bh, page, offset);
886 }
887 return head;
888
889
890
891no_grow:
892 if (head) {
893 do {
894 bh = head;
895 head = head->b_this_page;
896 free_buffer_head(bh);
897 } while (head);
898 }
899
900
901
902
903
904
905
906 if (!retry)
907 return NULL;
908
909
910
911
912
913
914
915 free_more_memory();
916 goto try_again;
917}
918EXPORT_SYMBOL_GPL(alloc_page_buffers);
919
920static inline void
921link_dev_buffers(struct page *page, struct buffer_head *head)
922{
923 struct buffer_head *bh, *tail;
924
925 bh = head;
926 do {
927 tail = bh;
928 bh = bh->b_this_page;
929 } while (bh);
930 tail->b_this_page = head;
931 attach_page_buffers(page, head);
932}
933
934static sector_t blkdev_max_block(struct block_device *bdev, unsigned int size)
935{
936 sector_t retval = ~((sector_t)0);
937 loff_t sz = i_size_read(bdev->bd_inode);
938
939 if (sz) {
940 unsigned int sizebits = blksize_bits(size);
941 retval = (sz >> sizebits);
942 }
943 return retval;
944}
945
946
947
948
949static sector_t
950init_page_buffers(struct page *page, struct block_device *bdev,
951 sector_t block, int size)
952{
953 struct buffer_head *head = page_buffers(page);
954 struct buffer_head *bh = head;
955 int uptodate = PageUptodate(page);
956 sector_t end_block = blkdev_max_block(I_BDEV(bdev->bd_inode), size);
957
958 do {
959 if (!buffer_mapped(bh)) {
960 init_buffer(bh, NULL, NULL);
961 bh->b_bdev = bdev;
962 bh->b_blocknr = block;
963 if (uptodate)
964 set_buffer_uptodate(bh);
965 if (block < end_block)
966 set_buffer_mapped(bh);
967 }
968 block++;
969 bh = bh->b_this_page;
970 } while (bh != head);
971
972
973
974
975 return end_block;
976}
977
978
979
980
981
982
983static int
984grow_dev_page(struct block_device *bdev, sector_t block,
985 pgoff_t index, int size, int sizebits, gfp_t gfp)
986{
987 struct inode *inode = bdev->bd_inode;
988 struct page *page;
989 struct buffer_head *bh;
990 sector_t end_block;
991 int ret = 0;
992 gfp_t gfp_mask;
993
994 gfp_mask = mapping_gfp_constraint(inode->i_mapping, ~__GFP_FS) | gfp;
995
996
997
998
999
1000
1001
1002 gfp_mask |= __GFP_NOFAIL;
1003
1004 page = find_or_create_page(inode->i_mapping, index, gfp_mask);
1005 if (!page)
1006 return ret;
1007
1008 BUG_ON(!PageLocked(page));
1009
1010 if (page_has_buffers(page)) {
1011 bh = page_buffers(page);
1012 if (bh->b_size == size) {
1013 end_block = init_page_buffers(page, bdev,
1014 (sector_t)index << sizebits,
1015 size);
1016 goto done;
1017 }
1018 if (!try_to_free_buffers(page))
1019 goto failed;
1020 }
1021
1022
1023
1024
1025 bh = alloc_page_buffers(page, size, 0);
1026 if (!bh)
1027 goto failed;
1028
1029
1030
1031
1032
1033
1034 spin_lock(&inode->i_mapping->private_lock);
1035 link_dev_buffers(page, bh);
1036 end_block = init_page_buffers(page, bdev, (sector_t)index << sizebits,
1037 size);
1038 spin_unlock(&inode->i_mapping->private_lock);
1039done:
1040 ret = (block < end_block) ? 1 : -ENXIO;
1041failed:
1042 unlock_page(page);
1043 put_page(page);
1044 return ret;
1045}
1046
1047
1048
1049
1050
1051static int
1052grow_buffers(struct block_device *bdev, sector_t block, int size, gfp_t gfp)
1053{
1054 pgoff_t index;
1055 int sizebits;
1056
1057 sizebits = -1;
1058 do {
1059 sizebits++;
1060 } while ((size << sizebits) < PAGE_SIZE);
1061
1062 index = block >> sizebits;
1063
1064
1065
1066
1067
1068 if (unlikely(index != block >> sizebits)) {
1069 printk(KERN_ERR "%s: requested out-of-range block %llu for "
1070 "device %pg\n",
1071 __func__, (unsigned long long)block,
1072 bdev);
1073 return -EIO;
1074 }
1075
1076
1077 return grow_dev_page(bdev, block, index, size, sizebits, gfp);
1078}
1079
1080struct buffer_head *
1081__getblk_slow(struct block_device *bdev, sector_t block,
1082 unsigned size, gfp_t gfp)
1083{
1084
1085 if (unlikely(size & (bdev_logical_block_size(bdev)-1) ||
1086 (size < 512 || size > PAGE_SIZE))) {
1087 printk(KERN_ERR "getblk(): invalid block size %d requested\n",
1088 size);
1089 printk(KERN_ERR "logical block size: %d\n",
1090 bdev_logical_block_size(bdev));
1091
1092 dump_stack();
1093 return NULL;
1094 }
1095
1096 for (;;) {
1097 struct buffer_head *bh;
1098 int ret;
1099
1100 bh = __find_get_block(bdev, block, size);
1101 if (bh)
1102 return bh;
1103
1104 ret = grow_buffers(bdev, block, size, gfp);
1105 if (ret < 0)
1106 return NULL;
1107 if (ret == 0)
1108 free_more_memory();
1109 }
1110}
1111EXPORT_SYMBOL(__getblk_slow);
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148void mark_buffer_dirty(struct buffer_head *bh)
1149{
1150 WARN_ON_ONCE(!buffer_uptodate(bh));
1151
1152 trace_block_dirty_buffer(bh);
1153
1154
1155
1156
1157
1158
1159
1160 if (buffer_dirty(bh)) {
1161 smp_mb();
1162 if (buffer_dirty(bh))
1163 return;
1164 }
1165
1166 if (!test_set_buffer_dirty(bh)) {
1167 struct page *page = bh->b_page;
1168 struct address_space *mapping = NULL;
1169
1170 lock_page_memcg(page);
1171 if (!TestSetPageDirty(page)) {
1172 mapping = page_mapping(page);
1173 if (mapping)
1174 __set_page_dirty(page, mapping, 0);
1175 }
1176 unlock_page_memcg(page);
1177 if (mapping)
1178 __mark_inode_dirty(mapping->host, I_DIRTY_PAGES);
1179 }
1180}
1181EXPORT_SYMBOL(mark_buffer_dirty);
1182
1183
1184
1185
1186
1187
1188
1189
1190void __brelse(struct buffer_head * buf)
1191{
1192 if (atomic_read(&buf->b_count)) {
1193 put_bh(buf);
1194 return;
1195 }
1196 WARN(1, KERN_ERR "VFS: brelse: Trying to free free buffer\n");
1197}
1198EXPORT_SYMBOL(__brelse);
1199
1200
1201
1202
1203
1204void __bforget(struct buffer_head *bh)
1205{
1206 clear_buffer_dirty(bh);
1207 if (bh->b_assoc_map) {
1208 struct address_space *buffer_mapping = bh->b_page->mapping;
1209
1210 spin_lock(&buffer_mapping->private_lock);
1211 list_del_init(&bh->b_assoc_buffers);
1212 bh->b_assoc_map = NULL;
1213 spin_unlock(&buffer_mapping->private_lock);
1214 }
1215 __brelse(bh);
1216}
1217EXPORT_SYMBOL(__bforget);
1218
1219static struct buffer_head *__bread_slow(struct buffer_head *bh)
1220{
1221 lock_buffer(bh);
1222 if (buffer_uptodate(bh)) {
1223 unlock_buffer(bh);
1224 return bh;
1225 } else {
1226 get_bh(bh);
1227 bh->b_end_io = end_buffer_read_sync;
1228 submit_bh(READ, bh);
1229 wait_on_buffer(bh);
1230 if (buffer_uptodate(bh))
1231 return bh;
1232 }
1233 brelse(bh);
1234 return NULL;
1235}
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251#define BH_LRU_SIZE 16
1252
1253struct bh_lru {
1254 struct buffer_head *bhs[BH_LRU_SIZE];
1255};
1256
1257static DEFINE_PER_CPU(struct bh_lru, bh_lrus) = {{ NULL }};
1258
1259#ifdef CONFIG_SMP
1260#define bh_lru_lock() local_irq_disable()
1261#define bh_lru_unlock() local_irq_enable()
1262#else
1263#define bh_lru_lock() preempt_disable()
1264#define bh_lru_unlock() preempt_enable()
1265#endif
1266
1267static inline void check_irqs_on(void)
1268{
1269#ifdef irqs_disabled
1270 BUG_ON(irqs_disabled());
1271#endif
1272}
1273
1274
1275
1276
1277static void bh_lru_install(struct buffer_head *bh)
1278{
1279 struct buffer_head *evictee = NULL;
1280
1281 check_irqs_on();
1282 bh_lru_lock();
1283 if (__this_cpu_read(bh_lrus.bhs[0]) != bh) {
1284 struct buffer_head *bhs[BH_LRU_SIZE];
1285 int in;
1286 int out = 0;
1287
1288 get_bh(bh);
1289 bhs[out++] = bh;
1290 for (in = 0; in < BH_LRU_SIZE; in++) {
1291 struct buffer_head *bh2 =
1292 __this_cpu_read(bh_lrus.bhs[in]);
1293
1294 if (bh2 == bh) {
1295 __brelse(bh2);
1296 } else {
1297 if (out >= BH_LRU_SIZE) {
1298 BUG_ON(evictee != NULL);
1299 evictee = bh2;
1300 } else {
1301 bhs[out++] = bh2;
1302 }
1303 }
1304 }
1305 while (out < BH_LRU_SIZE)
1306 bhs[out++] = NULL;
1307 memcpy(this_cpu_ptr(&bh_lrus.bhs), bhs, sizeof(bhs));
1308 }
1309 bh_lru_unlock();
1310
1311 if (evictee)
1312 __brelse(evictee);
1313}
1314
1315
1316
1317
1318static struct buffer_head *
1319lookup_bh_lru(struct block_device *bdev, sector_t block, unsigned size)
1320{
1321 struct buffer_head *ret = NULL;
1322 unsigned int i;
1323
1324 check_irqs_on();
1325 bh_lru_lock();
1326 for (i = 0; i < BH_LRU_SIZE; i++) {
1327 struct buffer_head *bh = __this_cpu_read(bh_lrus.bhs[i]);
1328
1329 if (bh && bh->b_blocknr == block && bh->b_bdev == bdev &&
1330 bh->b_size == size) {
1331 if (i) {
1332 while (i) {
1333 __this_cpu_write(bh_lrus.bhs[i],
1334 __this_cpu_read(bh_lrus.bhs[i - 1]));
1335 i--;
1336 }
1337 __this_cpu_write(bh_lrus.bhs[0], bh);
1338 }
1339 get_bh(bh);
1340 ret = bh;
1341 break;
1342 }
1343 }
1344 bh_lru_unlock();
1345 return ret;
1346}
1347
1348
1349
1350
1351
1352
1353struct buffer_head *
1354__find_get_block(struct block_device *bdev, sector_t block, unsigned size)
1355{
1356 struct buffer_head *bh = lookup_bh_lru(bdev, block, size);
1357
1358 if (bh == NULL) {
1359
1360 bh = __find_get_block_slow(bdev, block);
1361 if (bh)
1362 bh_lru_install(bh);
1363 } else
1364 touch_buffer(bh);
1365
1366 return bh;
1367}
1368EXPORT_SYMBOL(__find_get_block);
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378struct buffer_head *
1379__getblk_gfp(struct block_device *bdev, sector_t block,
1380 unsigned size, gfp_t gfp)
1381{
1382 struct buffer_head *bh = __find_get_block(bdev, block, size);
1383
1384 might_sleep();
1385 if (bh == NULL)
1386 bh = __getblk_slow(bdev, block, size, gfp);
1387 return bh;
1388}
1389EXPORT_SYMBOL(__getblk_gfp);
1390
1391
1392
1393
1394void __breadahead(struct block_device *bdev, sector_t block, unsigned size)
1395{
1396 struct buffer_head *bh = __getblk(bdev, block, size);
1397 if (likely(bh)) {
1398 ll_rw_block(READA, 1, &bh);
1399 brelse(bh);
1400 }
1401}
1402EXPORT_SYMBOL(__breadahead);
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416struct buffer_head *
1417__bread_gfp(struct block_device *bdev, sector_t block,
1418 unsigned size, gfp_t gfp)
1419{
1420 struct buffer_head *bh = __getblk_gfp(bdev, block, size, gfp);
1421
1422 if (likely(bh) && !buffer_uptodate(bh))
1423 bh = __bread_slow(bh);
1424 return bh;
1425}
1426EXPORT_SYMBOL(__bread_gfp);
1427
1428
1429
1430
1431
1432
1433static void invalidate_bh_lru(void *arg)
1434{
1435 struct bh_lru *b = &get_cpu_var(bh_lrus);
1436 int i;
1437
1438 for (i = 0; i < BH_LRU_SIZE; i++) {
1439 brelse(b->bhs[i]);
1440 b->bhs[i] = NULL;
1441 }
1442 put_cpu_var(bh_lrus);
1443}
1444
1445static bool has_bh_in_lru(int cpu, void *dummy)
1446{
1447 struct bh_lru *b = per_cpu_ptr(&bh_lrus, cpu);
1448 int i;
1449
1450 for (i = 0; i < BH_LRU_SIZE; i++) {
1451 if (b->bhs[i])
1452 return 1;
1453 }
1454
1455 return 0;
1456}
1457
1458void invalidate_bh_lrus(void)
1459{
1460 on_each_cpu_cond(has_bh_in_lru, invalidate_bh_lru, NULL, 1, GFP_KERNEL);
1461}
1462EXPORT_SYMBOL_GPL(invalidate_bh_lrus);
1463
1464void set_bh_page(struct buffer_head *bh,
1465 struct page *page, unsigned long offset)
1466{
1467 bh->b_page = page;
1468 BUG_ON(offset >= PAGE_SIZE);
1469 if (PageHighMem(page))
1470
1471
1472
1473 bh->b_data = (char *)(0 + offset);
1474 else
1475 bh->b_data = page_address(page) + offset;
1476}
1477EXPORT_SYMBOL(set_bh_page);
1478
1479
1480
1481
1482
1483
1484#define BUFFER_FLAGS_DISCARD \
1485 (1 << BH_Mapped | 1 << BH_New | 1 << BH_Req | \
1486 1 << BH_Delay | 1 << BH_Unwritten)
1487
1488static void discard_buffer(struct buffer_head * bh)
1489{
1490 unsigned long b_state, b_state_old;
1491
1492 lock_buffer(bh);
1493 clear_buffer_dirty(bh);
1494 bh->b_bdev = NULL;
1495 b_state = bh->b_state;
1496 for (;;) {
1497 b_state_old = cmpxchg(&bh->b_state, b_state,
1498 (b_state & ~BUFFER_FLAGS_DISCARD));
1499 if (b_state_old == b_state)
1500 break;
1501 b_state = b_state_old;
1502 }
1503 unlock_buffer(bh);
1504}
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522void block_invalidatepage(struct page *page, unsigned int offset,
1523 unsigned int length)
1524{
1525 struct buffer_head *head, *bh, *next;
1526 unsigned int curr_off = 0;
1527 unsigned int stop = length + offset;
1528
1529 BUG_ON(!PageLocked(page));
1530 if (!page_has_buffers(page))
1531 goto out;
1532
1533
1534
1535
1536 BUG_ON(stop > PAGE_SIZE || stop < length);
1537
1538 head = page_buffers(page);
1539 bh = head;
1540 do {
1541 unsigned int next_off = curr_off + bh->b_size;
1542 next = bh->b_this_page;
1543
1544
1545
1546
1547 if (next_off > stop)
1548 goto out;
1549
1550
1551
1552
1553 if (offset <= curr_off)
1554 discard_buffer(bh);
1555 curr_off = next_off;
1556 bh = next;
1557 } while (bh != head);
1558
1559
1560
1561
1562
1563
1564 if (offset == 0)
1565 try_to_release_page(page, 0);
1566out:
1567 return;
1568}
1569EXPORT_SYMBOL(block_invalidatepage);
1570
1571
1572
1573
1574
1575
1576
1577void create_empty_buffers(struct page *page,
1578 unsigned long blocksize, unsigned long b_state)
1579{
1580 struct buffer_head *bh, *head, *tail;
1581
1582 head = alloc_page_buffers(page, blocksize, 1);
1583 bh = head;
1584 do {
1585 bh->b_state |= b_state;
1586 tail = bh;
1587 bh = bh->b_this_page;
1588 } while (bh);
1589 tail->b_this_page = head;
1590
1591 spin_lock(&page->mapping->private_lock);
1592 if (PageUptodate(page) || PageDirty(page)) {
1593 bh = head;
1594 do {
1595 if (PageDirty(page))
1596 set_buffer_dirty(bh);
1597 if (PageUptodate(page))
1598 set_buffer_uptodate(bh);
1599 bh = bh->b_this_page;
1600 } while (bh != head);
1601 }
1602 attach_page_buffers(page, head);
1603 spin_unlock(&page->mapping->private_lock);
1604}
1605EXPORT_SYMBOL(create_empty_buffers);
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623void unmap_underlying_metadata(struct block_device *bdev, sector_t block)
1624{
1625 struct buffer_head *old_bh;
1626
1627 might_sleep();
1628
1629 old_bh = __find_get_block_slow(bdev, block);
1630 if (old_bh) {
1631 clear_buffer_dirty(old_bh);
1632 wait_on_buffer(old_bh);
1633 clear_buffer_req(old_bh);
1634 __brelse(old_bh);
1635 }
1636}
1637EXPORT_SYMBOL(unmap_underlying_metadata);
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647static inline int block_size_bits(unsigned int blocksize)
1648{
1649 return ilog2(blocksize);
1650}
1651
1652static struct buffer_head *create_page_buffers(struct page *page, struct inode *inode, unsigned int b_state)
1653{
1654 BUG_ON(!PageLocked(page));
1655
1656 if (!page_has_buffers(page))
1657 create_empty_buffers(page, 1 << ACCESS_ONCE(inode->i_blkbits), b_state);
1658 return page_buffers(page);
1659}
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690static int __block_write_full_page(struct inode *inode, struct page *page,
1691 get_block_t *get_block, struct writeback_control *wbc,
1692 bh_end_io_t *handler)
1693{
1694 int err;
1695 sector_t block;
1696 sector_t last_block;
1697 struct buffer_head *bh, *head;
1698 unsigned int blocksize, bbits;
1699 int nr_underway = 0;
1700 int write_op = (wbc->sync_mode == WB_SYNC_ALL ? WRITE_SYNC : WRITE);
1701
1702 head = create_page_buffers(page, inode,
1703 (1 << BH_Dirty)|(1 << BH_Uptodate));
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715 bh = head;
1716 blocksize = bh->b_size;
1717 bbits = block_size_bits(blocksize);
1718
1719 block = (sector_t)page->index << (PAGE_SHIFT - bbits);
1720 last_block = (i_size_read(inode) - 1) >> bbits;
1721
1722
1723
1724
1725
1726 do {
1727 if (block > last_block) {
1728
1729
1730
1731
1732
1733
1734
1735
1736 clear_buffer_dirty(bh);
1737 set_buffer_uptodate(bh);
1738 } else if ((!buffer_mapped(bh) || buffer_delay(bh)) &&
1739 buffer_dirty(bh)) {
1740 WARN_ON(bh->b_size != blocksize);
1741 err = get_block(inode, block, bh, 1);
1742 if (err)
1743 goto recover;
1744 clear_buffer_delay(bh);
1745 if (buffer_new(bh)) {
1746
1747 clear_buffer_new(bh);
1748 unmap_underlying_metadata(bh->b_bdev,
1749 bh->b_blocknr);
1750 }
1751 }
1752 bh = bh->b_this_page;
1753 block++;
1754 } while (bh != head);
1755
1756 do {
1757 if (!buffer_mapped(bh))
1758 continue;
1759
1760
1761
1762
1763
1764
1765
1766 if (wbc->sync_mode != WB_SYNC_NONE) {
1767 lock_buffer(bh);
1768 } else if (!trylock_buffer(bh)) {
1769 redirty_page_for_writepage(wbc, page);
1770 continue;
1771 }
1772 if (test_clear_buffer_dirty(bh)) {
1773 mark_buffer_async_write_endio(bh, handler);
1774 } else {
1775 unlock_buffer(bh);
1776 }
1777 } while ((bh = bh->b_this_page) != head);
1778
1779
1780
1781
1782
1783 BUG_ON(PageWriteback(page));
1784 set_page_writeback(page);
1785
1786 do {
1787 struct buffer_head *next = bh->b_this_page;
1788 if (buffer_async_write(bh)) {
1789 submit_bh_wbc(write_op, bh, 0, wbc);
1790 nr_underway++;
1791 }
1792 bh = next;
1793 } while (bh != head);
1794 unlock_page(page);
1795
1796 err = 0;
1797done:
1798 if (nr_underway == 0) {
1799
1800
1801
1802
1803
1804 end_page_writeback(page);
1805
1806
1807
1808
1809
1810 }
1811 return err;
1812
1813recover:
1814
1815
1816
1817
1818
1819
1820 bh = head;
1821
1822 do {
1823 if (buffer_mapped(bh) && buffer_dirty(bh) &&
1824 !buffer_delay(bh)) {
1825 lock_buffer(bh);
1826 mark_buffer_async_write_endio(bh, handler);
1827 } else {
1828
1829
1830
1831
1832 clear_buffer_dirty(bh);
1833 }
1834 } while ((bh = bh->b_this_page) != head);
1835 SetPageError(page);
1836 BUG_ON(PageWriteback(page));
1837 mapping_set_error(page->mapping, err);
1838 set_page_writeback(page);
1839 do {
1840 struct buffer_head *next = bh->b_this_page;
1841 if (buffer_async_write(bh)) {
1842 clear_buffer_dirty(bh);
1843 submit_bh_wbc(write_op, bh, 0, wbc);
1844 nr_underway++;
1845 }
1846 bh = next;
1847 } while (bh != head);
1848 unlock_page(page);
1849 goto done;
1850}
1851
1852
1853
1854
1855
1856
1857void page_zero_new_buffers(struct page *page, unsigned from, unsigned to)
1858{
1859 unsigned int block_start, block_end;
1860 struct buffer_head *head, *bh;
1861
1862 BUG_ON(!PageLocked(page));
1863 if (!page_has_buffers(page))
1864 return;
1865
1866 bh = head = page_buffers(page);
1867 block_start = 0;
1868 do {
1869 block_end = block_start + bh->b_size;
1870
1871 if (buffer_new(bh)) {
1872 if (block_end > from && block_start < to) {
1873 if (!PageUptodate(page)) {
1874 unsigned start, size;
1875
1876 start = max(from, block_start);
1877 size = min(to, block_end) - start;
1878
1879 zero_user(page, start, size);
1880 set_buffer_uptodate(bh);
1881 }
1882
1883 clear_buffer_new(bh);
1884 mark_buffer_dirty(bh);
1885 }
1886 }
1887
1888 block_start = block_end;
1889 bh = bh->b_this_page;
1890 } while (bh != head);
1891}
1892EXPORT_SYMBOL(page_zero_new_buffers);
1893
1894int __block_write_begin(struct page *page, loff_t pos, unsigned len,
1895 get_block_t *get_block)
1896{
1897 unsigned from = pos & (PAGE_SIZE - 1);
1898 unsigned to = from + len;
1899 struct inode *inode = page->mapping->host;
1900 unsigned block_start, block_end;
1901 sector_t block;
1902 int err = 0;
1903 unsigned blocksize, bbits;
1904 struct buffer_head *bh, *head, *wait[2], **wait_bh=wait;
1905
1906 BUG_ON(!PageLocked(page));
1907 BUG_ON(from > PAGE_SIZE);
1908 BUG_ON(to > PAGE_SIZE);
1909 BUG_ON(from > to);
1910
1911 head = create_page_buffers(page, inode, 0);
1912 blocksize = head->b_size;
1913 bbits = block_size_bits(blocksize);
1914
1915 block = (sector_t)page->index << (PAGE_SHIFT - bbits);
1916
1917 for(bh = head, block_start = 0; bh != head || !block_start;
1918 block++, block_start=block_end, bh = bh->b_this_page) {
1919 block_end = block_start + blocksize;
1920 if (block_end <= from || block_start >= to) {
1921 if (PageUptodate(page)) {
1922 if (!buffer_uptodate(bh))
1923 set_buffer_uptodate(bh);
1924 }
1925 continue;
1926 }
1927 if (buffer_new(bh))
1928 clear_buffer_new(bh);
1929 if (!buffer_mapped(bh)) {
1930 WARN_ON(bh->b_size != blocksize);
1931 err = get_block(inode, block, bh, 1);
1932 if (err)
1933 break;
1934 if (buffer_new(bh)) {
1935 unmap_underlying_metadata(bh->b_bdev,
1936 bh->b_blocknr);
1937 if (PageUptodate(page)) {
1938 clear_buffer_new(bh);
1939 set_buffer_uptodate(bh);
1940 mark_buffer_dirty(bh);
1941 continue;
1942 }
1943 if (block_end > to || block_start < from)
1944 zero_user_segments(page,
1945 to, block_end,
1946 block_start, from);
1947 continue;
1948 }
1949 }
1950 if (PageUptodate(page)) {
1951 if (!buffer_uptodate(bh))
1952 set_buffer_uptodate(bh);
1953 continue;
1954 }
1955 if (!buffer_uptodate(bh) && !buffer_delay(bh) &&
1956 !buffer_unwritten(bh) &&
1957 (block_start < from || block_end > to)) {
1958 ll_rw_block(READ, 1, &bh);
1959 *wait_bh++=bh;
1960 }
1961 }
1962
1963
1964
1965 while(wait_bh > wait) {
1966 wait_on_buffer(*--wait_bh);
1967 if (!buffer_uptodate(*wait_bh))
1968 err = -EIO;
1969 }
1970 if (unlikely(err))
1971 page_zero_new_buffers(page, from, to);
1972 return err;
1973}
1974EXPORT_SYMBOL(__block_write_begin);
1975
1976static int __block_commit_write(struct inode *inode, struct page *page,
1977 unsigned from, unsigned to)
1978{
1979 unsigned block_start, block_end;
1980 int partial = 0;
1981 unsigned blocksize;
1982 struct buffer_head *bh, *head;
1983
1984 bh = head = page_buffers(page);
1985 blocksize = bh->b_size;
1986
1987 block_start = 0;
1988 do {
1989 block_end = block_start + blocksize;
1990 if (block_end <= from || block_start >= to) {
1991 if (!buffer_uptodate(bh))
1992 partial = 1;
1993 } else {
1994 set_buffer_uptodate(bh);
1995 mark_buffer_dirty(bh);
1996 }
1997 clear_buffer_new(bh);
1998
1999 block_start = block_end;
2000 bh = bh->b_this_page;
2001 } while (bh != head);
2002
2003
2004
2005
2006
2007
2008
2009 if (!partial)
2010 SetPageUptodate(page);
2011 return 0;
2012}
2013
2014
2015
2016
2017
2018
2019
2020int block_write_begin(struct address_space *mapping, loff_t pos, unsigned len,
2021 unsigned flags, struct page **pagep, get_block_t *get_block)
2022{
2023 pgoff_t index = pos >> PAGE_SHIFT;
2024 struct page *page;
2025 int status;
2026
2027 page = grab_cache_page_write_begin(mapping, index, flags);
2028 if (!page)
2029 return -ENOMEM;
2030
2031 status = __block_write_begin(page, pos, len, get_block);
2032 if (unlikely(status)) {
2033 unlock_page(page);
2034 put_page(page);
2035 page = NULL;
2036 }
2037
2038 *pagep = page;
2039 return status;
2040}
2041EXPORT_SYMBOL(block_write_begin);
2042
2043int block_write_end(struct file *file, struct address_space *mapping,
2044 loff_t pos, unsigned len, unsigned copied,
2045 struct page *page, void *fsdata)
2046{
2047 struct inode *inode = mapping->host;
2048 unsigned start;
2049
2050 start = pos & (PAGE_SIZE - 1);
2051
2052 if (unlikely(copied < len)) {
2053
2054
2055
2056
2057
2058
2059
2060
2061
2062
2063
2064
2065 if (!PageUptodate(page))
2066 copied = 0;
2067
2068 page_zero_new_buffers(page, start+copied, start+len);
2069 }
2070 flush_dcache_page(page);
2071
2072
2073 __block_commit_write(inode, page, start, start+copied);
2074
2075 return copied;
2076}
2077EXPORT_SYMBOL(block_write_end);
2078
2079int generic_write_end(struct file *file, struct address_space *mapping,
2080 loff_t pos, unsigned len, unsigned copied,
2081 struct page *page, void *fsdata)
2082{
2083 struct inode *inode = mapping->host;
2084 loff_t old_size = inode->i_size;
2085 int i_size_changed = 0;
2086
2087 copied = block_write_end(file, mapping, pos, len, copied, page, fsdata);
2088
2089
2090
2091
2092
2093
2094
2095
2096 if (pos+copied > inode->i_size) {
2097 i_size_write(inode, pos+copied);
2098 i_size_changed = 1;
2099 }
2100
2101 unlock_page(page);
2102 put_page(page);
2103
2104 if (old_size < pos)
2105 pagecache_isize_extended(inode, old_size, pos);
2106
2107
2108
2109
2110
2111
2112 if (i_size_changed)
2113 mark_inode_dirty(inode);
2114
2115 return copied;
2116}
2117EXPORT_SYMBOL(generic_write_end);
2118
2119
2120
2121
2122
2123
2124
2125
2126int block_is_partially_uptodate(struct page *page, unsigned long from,
2127 unsigned long count)
2128{
2129 unsigned block_start, block_end, blocksize;
2130 unsigned to;
2131 struct buffer_head *bh, *head;
2132 int ret = 1;
2133
2134 if (!page_has_buffers(page))
2135 return 0;
2136
2137 head = page_buffers(page);
2138 blocksize = head->b_size;
2139 to = min_t(unsigned, PAGE_SIZE - from, count);
2140 to = from + to;
2141 if (from < blocksize && to > PAGE_SIZE - blocksize)
2142 return 0;
2143
2144 bh = head;
2145 block_start = 0;
2146 do {
2147 block_end = block_start + blocksize;
2148 if (block_end > from && block_start < to) {
2149 if (!buffer_uptodate(bh)) {
2150 ret = 0;
2151 break;
2152 }
2153 if (block_end >= to)
2154 break;
2155 }
2156 block_start = block_end;
2157 bh = bh->b_this_page;
2158 } while (bh != head);
2159
2160 return ret;
2161}
2162EXPORT_SYMBOL(block_is_partially_uptodate);
2163
2164
2165
2166
2167
2168
2169
2170
2171int block_read_full_page(struct page *page, get_block_t *get_block)
2172{
2173 struct inode *inode = page->mapping->host;
2174 sector_t iblock, lblock;
2175 struct buffer_head *bh, *head, *arr[MAX_BUF_PER_PAGE];
2176 unsigned int blocksize, bbits;
2177 int nr, i;
2178 int fully_mapped = 1;
2179
2180 head = create_page_buffers(page, inode, 0);
2181 blocksize = head->b_size;
2182 bbits = block_size_bits(blocksize);
2183
2184 iblock = (sector_t)page->index << (PAGE_SHIFT - bbits);
2185 lblock = (i_size_read(inode)+blocksize-1) >> bbits;
2186 bh = head;
2187 nr = 0;
2188 i = 0;
2189
2190 do {
2191 if (buffer_uptodate(bh))
2192 continue;
2193
2194 if (!buffer_mapped(bh)) {
2195 int err = 0;
2196
2197 fully_mapped = 0;
2198 if (iblock < lblock) {
2199 WARN_ON(bh->b_size != blocksize);
2200 err = get_block(inode, iblock, bh, 0);
2201 if (err)
2202 SetPageError(page);
2203 }
2204 if (!buffer_mapped(bh)) {
2205 zero_user(page, i * blocksize, blocksize);
2206 if (!err)
2207 set_buffer_uptodate(bh);
2208 continue;
2209 }
2210
2211
2212
2213
2214 if (buffer_uptodate(bh))
2215 continue;
2216 }
2217 arr[nr++] = bh;
2218 } while (i++, iblock++, (bh = bh->b_this_page) != head);
2219
2220 if (fully_mapped)
2221 SetPageMappedToDisk(page);
2222
2223 if (!nr) {
2224
2225
2226
2227
2228 if (!PageError(page))
2229 SetPageUptodate(page);
2230 unlock_page(page);
2231 return 0;
2232 }
2233
2234
2235 for (i = 0; i < nr; i++) {
2236 bh = arr[i];
2237 lock_buffer(bh);
2238 mark_buffer_async_read(bh);
2239 }
2240
2241
2242
2243
2244
2245
2246 for (i = 0; i < nr; i++) {
2247 bh = arr[i];
2248 if (buffer_uptodate(bh))
2249 end_buffer_async_read(bh, 1);
2250 else
2251 submit_bh(READ, bh);
2252 }
2253 return 0;
2254}
2255EXPORT_SYMBOL(block_read_full_page);
2256
2257
2258
2259
2260
2261int generic_cont_expand_simple(struct inode *inode, loff_t size)
2262{
2263 struct address_space *mapping = inode->i_mapping;
2264 struct page *page;
2265 void *fsdata;
2266 int err;
2267
2268 err = inode_newsize_ok(inode, size);
2269 if (err)
2270 goto out;
2271
2272 err = pagecache_write_begin(NULL, mapping, size, 0,
2273 AOP_FLAG_UNINTERRUPTIBLE|AOP_FLAG_CONT_EXPAND,
2274 &page, &fsdata);
2275 if (err)
2276 goto out;
2277
2278 err = pagecache_write_end(NULL, mapping, size, 0, 0, page, fsdata);
2279 BUG_ON(err > 0);
2280
2281out:
2282 return err;
2283}
2284EXPORT_SYMBOL(generic_cont_expand_simple);
2285
2286static int cont_expand_zero(struct file *file, struct address_space *mapping,
2287 loff_t pos, loff_t *bytes)
2288{
2289 struct inode *inode = mapping->host;
2290 unsigned blocksize = 1 << inode->i_blkbits;
2291 struct page *page;
2292 void *fsdata;
2293 pgoff_t index, curidx;
2294 loff_t curpos;
2295 unsigned zerofrom, offset, len;
2296 int err = 0;
2297
2298 index = pos >> PAGE_SHIFT;
2299 offset = pos & ~PAGE_MASK;
2300
2301 while (index > (curidx = (curpos = *bytes)>>PAGE_SHIFT)) {
2302 zerofrom = curpos & ~PAGE_MASK;
2303 if (zerofrom & (blocksize-1)) {
2304 *bytes |= (blocksize-1);
2305 (*bytes)++;
2306 }
2307 len = PAGE_SIZE - zerofrom;
2308
2309 err = pagecache_write_begin(file, mapping, curpos, len,
2310 AOP_FLAG_UNINTERRUPTIBLE,
2311 &page, &fsdata);
2312 if (err)
2313 goto out;
2314 zero_user(page, zerofrom, len);
2315 err = pagecache_write_end(file, mapping, curpos, len, len,
2316 page, fsdata);
2317 if (err < 0)
2318 goto out;
2319 BUG_ON(err != len);
2320 err = 0;
2321
2322 balance_dirty_pages_ratelimited(mapping);
2323
2324 if (unlikely(fatal_signal_pending(current))) {
2325 err = -EINTR;
2326 goto out;
2327 }
2328 }
2329
2330
2331 if (index == curidx) {
2332 zerofrom = curpos & ~PAGE_MASK;
2333
2334 if (offset <= zerofrom) {
2335 goto out;
2336 }
2337 if (zerofrom & (blocksize-1)) {
2338 *bytes |= (blocksize-1);
2339 (*bytes)++;
2340 }
2341 len = offset - zerofrom;
2342
2343 err = pagecache_write_begin(file, mapping, curpos, len,
2344 AOP_FLAG_UNINTERRUPTIBLE,
2345 &page, &fsdata);
2346 if (err)
2347 goto out;
2348 zero_user(page, zerofrom, len);
2349 err = pagecache_write_end(file, mapping, curpos, len, len,
2350 page, fsdata);
2351 if (err < 0)
2352 goto out;
2353 BUG_ON(err != len);
2354 err = 0;
2355 }
2356out:
2357 return err;
2358}
2359
2360
2361
2362
2363
2364int cont_write_begin(struct file *file, struct address_space *mapping,
2365 loff_t pos, unsigned len, unsigned flags,
2366 struct page **pagep, void **fsdata,
2367 get_block_t *get_block, loff_t *bytes)
2368{
2369 struct inode *inode = mapping->host;
2370 unsigned blocksize = 1 << inode->i_blkbits;
2371 unsigned zerofrom;
2372 int err;
2373
2374 err = cont_expand_zero(file, mapping, pos, bytes);
2375 if (err)
2376 return err;
2377
2378 zerofrom = *bytes & ~PAGE_MASK;
2379 if (pos+len > *bytes && zerofrom & (blocksize-1)) {
2380 *bytes |= (blocksize-1);
2381 (*bytes)++;
2382 }
2383
2384 return block_write_begin(mapping, pos, len, flags, pagep, get_block);
2385}
2386EXPORT_SYMBOL(cont_write_begin);
2387
2388int block_commit_write(struct page *page, unsigned from, unsigned to)
2389{
2390 struct inode *inode = page->mapping->host;
2391 __block_commit_write(inode,page,from,to);
2392 return 0;
2393}
2394EXPORT_SYMBOL(block_commit_write);
2395
2396
2397
2398
2399
2400
2401
2402
2403
2404
2405
2406
2407
2408
2409
2410
2411
2412
2413
2414int block_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf,
2415 get_block_t get_block)
2416{
2417 struct page *page = vmf->page;
2418 struct inode *inode = file_inode(vma->vm_file);
2419 unsigned long end;
2420 loff_t size;
2421 int ret;
2422
2423 lock_page(page);
2424 size = i_size_read(inode);
2425 if ((page->mapping != inode->i_mapping) ||
2426 (page_offset(page) > size)) {
2427
2428 ret = -EFAULT;
2429 goto out_unlock;
2430 }
2431
2432
2433 if (((page->index + 1) << PAGE_SHIFT) > size)
2434 end = size & ~PAGE_MASK;
2435 else
2436 end = PAGE_SIZE;
2437
2438 ret = __block_write_begin(page, 0, end, get_block);
2439 if (!ret)
2440 ret = block_commit_write(page, 0, end);
2441
2442 if (unlikely(ret < 0))
2443 goto out_unlock;
2444 set_page_dirty(page);
2445 wait_for_stable_page(page);
2446 return 0;
2447out_unlock:
2448 unlock_page(page);
2449 return ret;
2450}
2451EXPORT_SYMBOL(block_page_mkwrite);
2452
2453
2454
2455
2456
2457
2458static void end_buffer_read_nobh(struct buffer_head *bh, int uptodate)
2459{
2460 __end_buffer_read_notouch(bh, uptodate);
2461}
2462
2463
2464
2465
2466
2467
2468static void attach_nobh_buffers(struct page *page, struct buffer_head *head)
2469{
2470 struct buffer_head *bh;
2471
2472 BUG_ON(!PageLocked(page));
2473
2474 spin_lock(&page->mapping->private_lock);
2475 bh = head;
2476 do {
2477 if (PageDirty(page))
2478 set_buffer_dirty(bh);
2479 if (!bh->b_this_page)
2480 bh->b_this_page = head;
2481 bh = bh->b_this_page;
2482 } while (bh != head);
2483 attach_page_buffers(page, head);
2484 spin_unlock(&page->mapping->private_lock);
2485}
2486
2487
2488
2489
2490
2491
2492int nobh_write_begin(struct address_space *mapping,
2493 loff_t pos, unsigned len, unsigned flags,
2494 struct page **pagep, void **fsdata,
2495 get_block_t *get_block)
2496{
2497 struct inode *inode = mapping->host;
2498 const unsigned blkbits = inode->i_blkbits;
2499 const unsigned blocksize = 1 << blkbits;
2500 struct buffer_head *head, *bh;
2501 struct page *page;
2502 pgoff_t index;
2503 unsigned from, to;
2504 unsigned block_in_page;
2505 unsigned block_start, block_end;
2506 sector_t block_in_file;
2507 int nr_reads = 0;
2508 int ret = 0;
2509 int is_mapped_to_disk = 1;
2510
2511 index = pos >> PAGE_SHIFT;
2512 from = pos & (PAGE_SIZE - 1);
2513 to = from + len;
2514
2515 page = grab_cache_page_write_begin(mapping, index, flags);
2516 if (!page)
2517 return -ENOMEM;
2518 *pagep = page;
2519 *fsdata = NULL;
2520
2521 if (page_has_buffers(page)) {
2522 ret = __block_write_begin(page, pos, len, get_block);
2523 if (unlikely(ret))
2524 goto out_release;
2525 return ret;
2526 }
2527
2528 if (PageMappedToDisk(page))
2529 return 0;
2530
2531
2532
2533
2534
2535
2536
2537
2538
2539
2540 head = alloc_page_buffers(page, blocksize, 0);
2541 if (!head) {
2542 ret = -ENOMEM;
2543 goto out_release;
2544 }
2545
2546 block_in_file = (sector_t)page->index << (PAGE_SHIFT - blkbits);
2547
2548
2549
2550
2551
2552
2553 for (block_start = 0, block_in_page = 0, bh = head;
2554 block_start < PAGE_SIZE;
2555 block_in_page++, block_start += blocksize, bh = bh->b_this_page) {
2556 int create;
2557
2558 block_end = block_start + blocksize;
2559 bh->b_state = 0;
2560 create = 1;
2561 if (block_start >= to)
2562 create = 0;
2563 ret = get_block(inode, block_in_file + block_in_page,
2564 bh, create);
2565 if (ret)
2566 goto failed;
2567 if (!buffer_mapped(bh))
2568 is_mapped_to_disk = 0;
2569 if (buffer_new(bh))
2570 unmap_underlying_metadata(bh->b_bdev, bh->b_blocknr);
2571 if (PageUptodate(page)) {
2572 set_buffer_uptodate(bh);
2573 continue;
2574 }
2575 if (buffer_new(bh) || !buffer_mapped(bh)) {
2576 zero_user_segments(page, block_start, from,
2577 to, block_end);
2578 continue;
2579 }
2580 if (buffer_uptodate(bh))
2581 continue;
2582 if (block_start < from || block_end > to) {
2583 lock_buffer(bh);
2584 bh->b_end_io = end_buffer_read_nobh;
2585 submit_bh(READ, bh);
2586 nr_reads++;
2587 }
2588 }
2589
2590 if (nr_reads) {
2591
2592
2593
2594
2595
2596 for (bh = head; bh; bh = bh->b_this_page) {
2597 wait_on_buffer(bh);
2598 if (!buffer_uptodate(bh))
2599 ret = -EIO;
2600 }
2601 if (ret)
2602 goto failed;
2603 }
2604
2605 if (is_mapped_to_disk)
2606 SetPageMappedToDisk(page);
2607
2608 *fsdata = head;
2609
2610 return 0;
2611
2612failed:
2613 BUG_ON(!ret);
2614
2615
2616
2617
2618
2619
2620
2621 attach_nobh_buffers(page, head);
2622 page_zero_new_buffers(page, from, to);
2623
2624out_release:
2625 unlock_page(page);
2626 put_page(page);
2627 *pagep = NULL;
2628
2629 return ret;
2630}
2631EXPORT_SYMBOL(nobh_write_begin);
2632
2633int nobh_write_end(struct file *file, struct address_space *mapping,
2634 loff_t pos, unsigned len, unsigned copied,
2635 struct page *page, void *fsdata)
2636{
2637 struct inode *inode = page->mapping->host;
2638 struct buffer_head *head = fsdata;
2639 struct buffer_head *bh;
2640 BUG_ON(fsdata != NULL && page_has_buffers(page));
2641
2642 if (unlikely(copied < len) && head)
2643 attach_nobh_buffers(page, head);
2644 if (page_has_buffers(page))
2645 return generic_write_end(file, mapping, pos, len,
2646 copied, page, fsdata);
2647
2648 SetPageUptodate(page);
2649 set_page_dirty(page);
2650 if (pos+copied > inode->i_size) {
2651 i_size_write(inode, pos+copied);
2652 mark_inode_dirty(inode);
2653 }
2654
2655 unlock_page(page);
2656 put_page(page);
2657
2658 while (head) {
2659 bh = head;
2660 head = head->b_this_page;
2661 free_buffer_head(bh);
2662 }
2663
2664 return copied;
2665}
2666EXPORT_SYMBOL(nobh_write_end);
2667
2668
2669
2670
2671
2672
2673int nobh_writepage(struct page *page, get_block_t *get_block,
2674 struct writeback_control *wbc)
2675{
2676 struct inode * const inode = page->mapping->host;
2677 loff_t i_size = i_size_read(inode);
2678 const pgoff_t end_index = i_size >> PAGE_SHIFT;
2679 unsigned offset;
2680 int ret;
2681
2682
2683 if (page->index < end_index)
2684 goto out;
2685
2686
2687 offset = i_size & (PAGE_SIZE-1);
2688 if (page->index >= end_index+1 || !offset) {
2689
2690
2691
2692
2693
2694#if 0
2695
2696 if (page->mapping->a_ops->invalidatepage)
2697 page->mapping->a_ops->invalidatepage(page, offset);
2698#endif
2699 unlock_page(page);
2700 return 0;
2701 }
2702
2703
2704
2705
2706
2707
2708
2709
2710 zero_user_segment(page, offset, PAGE_SIZE);
2711out:
2712 ret = mpage_writepage(page, get_block, wbc);
2713 if (ret == -EAGAIN)
2714 ret = __block_write_full_page(inode, page, get_block, wbc,
2715 end_buffer_async_write);
2716 return ret;
2717}
2718EXPORT_SYMBOL(nobh_writepage);
2719
2720int nobh_truncate_page(struct address_space *mapping,
2721 loff_t from, get_block_t *get_block)
2722{
2723 pgoff_t index = from >> PAGE_SHIFT;
2724 unsigned offset = from & (PAGE_SIZE-1);
2725 unsigned blocksize;
2726 sector_t iblock;
2727 unsigned length, pos;
2728 struct inode *inode = mapping->host;
2729 struct page *page;
2730 struct buffer_head map_bh;
2731 int err;
2732
2733 blocksize = 1 << inode->i_blkbits;
2734 length = offset & (blocksize - 1);
2735
2736
2737 if (!length)
2738 return 0;
2739
2740 length = blocksize - length;
2741 iblock = (sector_t)index << (PAGE_SHIFT - inode->i_blkbits);
2742
2743 page = grab_cache_page(mapping, index);
2744 err = -ENOMEM;
2745 if (!page)
2746 goto out;
2747
2748 if (page_has_buffers(page)) {
2749has_buffers:
2750 unlock_page(page);
2751 put_page(page);
2752 return block_truncate_page(mapping, from, get_block);
2753 }
2754
2755
2756 pos = blocksize;
2757 while (offset >= pos) {
2758 iblock++;
2759 pos += blocksize;
2760 }
2761
2762 map_bh.b_size = blocksize;
2763 map_bh.b_state = 0;
2764 err = get_block(inode, iblock, &map_bh, 0);
2765 if (err)
2766 goto unlock;
2767
2768 if (!buffer_mapped(&map_bh))
2769 goto unlock;
2770
2771
2772 if (!PageUptodate(page)) {
2773 err = mapping->a_ops->readpage(NULL, page);
2774 if (err) {
2775 put_page(page);
2776 goto out;
2777 }
2778 lock_page(page);
2779 if (!PageUptodate(page)) {
2780 err = -EIO;
2781 goto unlock;
2782 }
2783 if (page_has_buffers(page))
2784 goto has_buffers;
2785 }
2786 zero_user(page, offset, length);
2787 set_page_dirty(page);
2788 err = 0;
2789
2790unlock:
2791 unlock_page(page);
2792 put_page(page);
2793out:
2794 return err;
2795}
2796EXPORT_SYMBOL(nobh_truncate_page);
2797
2798int block_truncate_page(struct address_space *mapping,
2799 loff_t from, get_block_t *get_block)
2800{
2801 pgoff_t index = from >> PAGE_SHIFT;
2802 unsigned offset = from & (PAGE_SIZE-1);
2803 unsigned blocksize;
2804 sector_t iblock;
2805 unsigned length, pos;
2806 struct inode *inode = mapping->host;
2807 struct page *page;
2808 struct buffer_head *bh;
2809 int err;
2810
2811 blocksize = 1 << inode->i_blkbits;
2812 length = offset & (blocksize - 1);
2813
2814
2815 if (!length)
2816 return 0;
2817
2818 length = blocksize - length;
2819 iblock = (sector_t)index << (PAGE_SHIFT - inode->i_blkbits);
2820
2821 page = grab_cache_page(mapping, index);
2822 err = -ENOMEM;
2823 if (!page)
2824 goto out;
2825
2826 if (!page_has_buffers(page))
2827 create_empty_buffers(page, blocksize, 0);
2828
2829
2830 bh = page_buffers(page);
2831 pos = blocksize;
2832 while (offset >= pos) {
2833 bh = bh->b_this_page;
2834 iblock++;
2835 pos += blocksize;
2836 }
2837
2838 err = 0;
2839 if (!buffer_mapped(bh)) {
2840 WARN_ON(bh->b_size != blocksize);
2841 err = get_block(inode, iblock, bh, 0);
2842 if (err)
2843 goto unlock;
2844
2845 if (!buffer_mapped(bh))
2846 goto unlock;
2847 }
2848
2849
2850 if (PageUptodate(page))
2851 set_buffer_uptodate(bh);
2852
2853 if (!buffer_uptodate(bh) && !buffer_delay(bh) && !buffer_unwritten(bh)) {
2854 err = -EIO;
2855 ll_rw_block(READ, 1, &bh);
2856 wait_on_buffer(bh);
2857
2858 if (!buffer_uptodate(bh))
2859 goto unlock;
2860 }
2861
2862 zero_user(page, offset, length);
2863 mark_buffer_dirty(bh);
2864 err = 0;
2865
2866unlock:
2867 unlock_page(page);
2868 put_page(page);
2869out:
2870 return err;
2871}
2872EXPORT_SYMBOL(block_truncate_page);
2873
2874
2875
2876
2877int block_write_full_page(struct page *page, get_block_t *get_block,
2878 struct writeback_control *wbc)
2879{
2880 struct inode * const inode = page->mapping->host;
2881 loff_t i_size = i_size_read(inode);
2882 const pgoff_t end_index = i_size >> PAGE_SHIFT;
2883 unsigned offset;
2884
2885
2886 if (page->index < end_index)
2887 return __block_write_full_page(inode, page, get_block, wbc,
2888 end_buffer_async_write);
2889
2890
2891 offset = i_size & (PAGE_SIZE-1);
2892 if (page->index >= end_index+1 || !offset) {
2893
2894
2895
2896
2897
2898 do_invalidatepage(page, 0, PAGE_SIZE);
2899 unlock_page(page);
2900 return 0;
2901 }
2902
2903
2904
2905
2906
2907
2908
2909
2910 zero_user_segment(page, offset, PAGE_SIZE);
2911 return __block_write_full_page(inode, page, get_block, wbc,
2912 end_buffer_async_write);
2913}
2914EXPORT_SYMBOL(block_write_full_page);
2915
2916sector_t generic_block_bmap(struct address_space *mapping, sector_t block,
2917 get_block_t *get_block)
2918{
2919 struct buffer_head tmp;
2920 struct inode *inode = mapping->host;
2921 tmp.b_state = 0;
2922 tmp.b_blocknr = 0;
2923 tmp.b_size = 1 << inode->i_blkbits;
2924 get_block(inode, block, &tmp, 0);
2925 return tmp.b_blocknr;
2926}
2927EXPORT_SYMBOL(generic_block_bmap);
2928
2929static void end_bio_bh_io_sync(struct bio *bio)
2930{
2931 struct buffer_head *bh = bio->bi_private;
2932
2933 if (unlikely(bio_flagged(bio, BIO_QUIET)))
2934 set_bit(BH_Quiet, &bh->b_state);
2935
2936 bh->b_end_io(bh, !bio->bi_error);
2937 bio_put(bio);
2938}
2939
2940
2941
2942
2943
2944
2945
2946
2947
2948
2949
2950
2951
2952void guard_bio_eod(int rw, struct bio *bio)
2953{
2954 sector_t maxsector;
2955 struct bio_vec *bvec = &bio->bi_io_vec[bio->bi_vcnt - 1];
2956 unsigned truncated_bytes;
2957
2958 maxsector = i_size_read(bio->bi_bdev->bd_inode) >> 9;
2959 if (!maxsector)
2960 return;
2961
2962
2963
2964
2965
2966
2967 if (unlikely(bio->bi_iter.bi_sector >= maxsector))
2968 return;
2969
2970 maxsector -= bio->bi_iter.bi_sector;
2971 if (likely((bio->bi_iter.bi_size >> 9) <= maxsector))
2972 return;
2973
2974
2975 truncated_bytes = bio->bi_iter.bi_size - (maxsector << 9);
2976
2977
2978 bio->bi_iter.bi_size -= truncated_bytes;
2979 bvec->bv_len -= truncated_bytes;
2980
2981
2982 if ((rw & RW_MASK) == READ) {
2983 zero_user(bvec->bv_page, bvec->bv_offset + bvec->bv_len,
2984 truncated_bytes);
2985 }
2986}
2987
2988static int submit_bh_wbc(int rw, struct buffer_head *bh,
2989 unsigned long bio_flags, struct writeback_control *wbc)
2990{
2991 struct bio *bio;
2992
2993 BUG_ON(!buffer_locked(bh));
2994 BUG_ON(!buffer_mapped(bh));
2995 BUG_ON(!bh->b_end_io);
2996 BUG_ON(buffer_delay(bh));
2997 BUG_ON(buffer_unwritten(bh));
2998
2999
3000
3001
3002 if (test_set_buffer_req(bh) && (rw & WRITE))
3003 clear_buffer_write_io_error(bh);
3004
3005
3006
3007
3008
3009 bio = bio_alloc(GFP_NOIO, 1);
3010
3011 if (wbc) {
3012 wbc_init_bio(wbc, bio);
3013 wbc_account_io(wbc, bh->b_page, bh->b_size);
3014 }
3015
3016 bio->bi_iter.bi_sector = bh->b_blocknr * (bh->b_size >> 9);
3017 bio->bi_bdev = bh->b_bdev;
3018
3019 bio_add_page(bio, bh->b_page, bh->b_size, bh_offset(bh));
3020 BUG_ON(bio->bi_iter.bi_size != bh->b_size);
3021
3022 bio->bi_end_io = end_bio_bh_io_sync;
3023 bio->bi_private = bh;
3024 bio->bi_flags |= bio_flags;
3025
3026
3027 guard_bio_eod(rw, bio);
3028
3029 if (buffer_meta(bh))
3030 rw |= REQ_META;
3031 if (buffer_prio(bh))
3032 rw |= REQ_PRIO;
3033
3034 submit_bio(rw, bio);
3035 return 0;
3036}
3037
3038int _submit_bh(int rw, struct buffer_head *bh, unsigned long bio_flags)
3039{
3040 return submit_bh_wbc(rw, bh, bio_flags, NULL);
3041}
3042EXPORT_SYMBOL_GPL(_submit_bh);
3043
3044int submit_bh(int rw, struct buffer_head *bh)
3045{
3046 return submit_bh_wbc(rw, bh, 0, NULL);
3047}
3048EXPORT_SYMBOL(submit_bh);
3049
3050
3051
3052
3053
3054
3055
3056
3057
3058
3059
3060
3061
3062
3063
3064
3065
3066
3067
3068
3069
3070
3071
3072
3073
3074
3075void ll_rw_block(int rw, int nr, struct buffer_head *bhs[])
3076{
3077 int i;
3078
3079 for (i = 0; i < nr; i++) {
3080 struct buffer_head *bh = bhs[i];
3081
3082 if (!trylock_buffer(bh))
3083 continue;
3084 if (rw == WRITE) {
3085 if (test_clear_buffer_dirty(bh)) {
3086 bh->b_end_io = end_buffer_write_sync;
3087 get_bh(bh);
3088 submit_bh(WRITE, bh);
3089 continue;
3090 }
3091 } else {
3092 if (!buffer_uptodate(bh)) {
3093 bh->b_end_io = end_buffer_read_sync;
3094 get_bh(bh);
3095 submit_bh(rw, bh);
3096 continue;
3097 }
3098 }
3099 unlock_buffer(bh);
3100 }
3101}
3102EXPORT_SYMBOL(ll_rw_block);
3103
3104void write_dirty_buffer(struct buffer_head *bh, int rw)
3105{
3106 lock_buffer(bh);
3107 if (!test_clear_buffer_dirty(bh)) {
3108 unlock_buffer(bh);
3109 return;
3110 }
3111 bh->b_end_io = end_buffer_write_sync;
3112 get_bh(bh);
3113 submit_bh(rw, bh);
3114}
3115EXPORT_SYMBOL(write_dirty_buffer);
3116
3117
3118
3119
3120
3121
3122int __sync_dirty_buffer(struct buffer_head *bh, int rw)
3123{
3124 int ret = 0;
3125
3126 WARN_ON(atomic_read(&bh->b_count) < 1);
3127 lock_buffer(bh);
3128 if (test_clear_buffer_dirty(bh)) {
3129 get_bh(bh);
3130 bh->b_end_io = end_buffer_write_sync;
3131 ret = submit_bh(rw, bh);
3132 wait_on_buffer(bh);
3133 if (!ret && !buffer_uptodate(bh))
3134 ret = -EIO;
3135 } else {
3136 unlock_buffer(bh);
3137 }
3138 return ret;
3139}
3140EXPORT_SYMBOL(__sync_dirty_buffer);
3141
3142int sync_dirty_buffer(struct buffer_head *bh)
3143{
3144 return __sync_dirty_buffer(bh, WRITE_SYNC);
3145}
3146EXPORT_SYMBOL(sync_dirty_buffer);
3147
3148
3149
3150
3151
3152
3153
3154
3155
3156
3157
3158
3159
3160
3161
3162
3163
3164
3165
3166
3167
3168static inline int buffer_busy(struct buffer_head *bh)
3169{
3170 return atomic_read(&bh->b_count) |
3171 (bh->b_state & ((1 << BH_Dirty) | (1 << BH_Lock)));
3172}
3173
3174static int
3175drop_buffers(struct page *page, struct buffer_head **buffers_to_free)
3176{
3177 struct buffer_head *head = page_buffers(page);
3178 struct buffer_head *bh;
3179
3180 bh = head;
3181 do {
3182 if (buffer_write_io_error(bh) && page->mapping)
3183 set_bit(AS_EIO, &page->mapping->flags);
3184 if (buffer_busy(bh))
3185 goto failed;
3186 bh = bh->b_this_page;
3187 } while (bh != head);
3188
3189 do {
3190 struct buffer_head *next = bh->b_this_page;
3191
3192 if (bh->b_assoc_map)
3193 __remove_assoc_queue(bh);
3194 bh = next;
3195 } while (bh != head);
3196 *buffers_to_free = head;
3197 __clear_page_buffers(page);
3198 return 1;
3199failed:
3200 return 0;
3201}
3202
3203int try_to_free_buffers(struct page *page)
3204{
3205 struct address_space * const mapping = page->mapping;
3206 struct buffer_head *buffers_to_free = NULL;
3207 int ret = 0;
3208
3209 BUG_ON(!PageLocked(page));
3210 if (PageWriteback(page))
3211 return 0;
3212
3213 if (mapping == NULL) {
3214 ret = drop_buffers(page, &buffers_to_free);
3215 goto out;
3216 }
3217
3218 spin_lock(&mapping->private_lock);
3219 ret = drop_buffers(page, &buffers_to_free);
3220
3221
3222
3223
3224
3225
3226
3227
3228
3229
3230
3231
3232
3233
3234
3235 if (ret)
3236 cancel_dirty_page(page);
3237 spin_unlock(&mapping->private_lock);
3238out:
3239 if (buffers_to_free) {
3240 struct buffer_head *bh = buffers_to_free;
3241
3242 do {
3243 struct buffer_head *next = bh->b_this_page;
3244 free_buffer_head(bh);
3245 bh = next;
3246 } while (bh != buffers_to_free);
3247 }
3248 return ret;
3249}
3250EXPORT_SYMBOL(try_to_free_buffers);
3251
3252
3253
3254
3255
3256
3257
3258
3259SYSCALL_DEFINE2(bdflush, int, func, long, data)
3260{
3261 static int msg_count;
3262
3263 if (!capable(CAP_SYS_ADMIN))
3264 return -EPERM;
3265
3266 if (msg_count < 5) {
3267 msg_count++;
3268 printk(KERN_INFO
3269 "warning: process `%s' used the obsolete bdflush"
3270 " system call\n", current->comm);
3271 printk(KERN_INFO "Fix your initscripts?\n");
3272 }
3273
3274 if (func == 1)
3275 do_exit(0);
3276 return 0;
3277}
3278
3279
3280
3281
3282static struct kmem_cache *bh_cachep __read_mostly;
3283
3284
3285
3286
3287
3288static unsigned long max_buffer_heads;
3289
3290int buffer_heads_over_limit;
3291
3292struct bh_accounting {
3293 int nr;
3294 int ratelimit;
3295};
3296
3297static DEFINE_PER_CPU(struct bh_accounting, bh_accounting) = {0, 0};
3298
3299static void recalc_bh_state(void)
3300{
3301 int i;
3302 int tot = 0;
3303
3304 if (__this_cpu_inc_return(bh_accounting.ratelimit) - 1 < 4096)
3305 return;
3306 __this_cpu_write(bh_accounting.ratelimit, 0);
3307 for_each_online_cpu(i)
3308 tot += per_cpu(bh_accounting, i).nr;
3309 buffer_heads_over_limit = (tot > max_buffer_heads);
3310}
3311
3312struct buffer_head *alloc_buffer_head(gfp_t gfp_flags)
3313{
3314 struct buffer_head *ret = kmem_cache_zalloc(bh_cachep, gfp_flags);
3315 if (ret) {
3316 INIT_LIST_HEAD(&ret->b_assoc_buffers);
3317 preempt_disable();
3318 __this_cpu_inc(bh_accounting.nr);
3319 recalc_bh_state();
3320 preempt_enable();
3321 }
3322 return ret;
3323}
3324EXPORT_SYMBOL(alloc_buffer_head);
3325
3326void free_buffer_head(struct buffer_head *bh)
3327{
3328 BUG_ON(!list_empty(&bh->b_assoc_buffers));
3329 kmem_cache_free(bh_cachep, bh);
3330 preempt_disable();
3331 __this_cpu_dec(bh_accounting.nr);
3332 recalc_bh_state();
3333 preempt_enable();
3334}
3335EXPORT_SYMBOL(free_buffer_head);
3336
3337static void buffer_exit_cpu(int cpu)
3338{
3339 int i;
3340 struct bh_lru *b = &per_cpu(bh_lrus, cpu);
3341
3342 for (i = 0; i < BH_LRU_SIZE; i++) {
3343 brelse(b->bhs[i]);
3344 b->bhs[i] = NULL;
3345 }
3346 this_cpu_add(bh_accounting.nr, per_cpu(bh_accounting, cpu).nr);
3347 per_cpu(bh_accounting, cpu).nr = 0;
3348}
3349
3350static int buffer_cpu_notify(struct notifier_block *self,
3351 unsigned long action, void *hcpu)
3352{
3353 if (action == CPU_DEAD || action == CPU_DEAD_FROZEN)
3354 buffer_exit_cpu((unsigned long)hcpu);
3355 return NOTIFY_OK;
3356}
3357
3358
3359
3360
3361
3362
3363
3364
3365int bh_uptodate_or_lock(struct buffer_head *bh)
3366{
3367 if (!buffer_uptodate(bh)) {
3368 lock_buffer(bh);
3369 if (!buffer_uptodate(bh))
3370 return 0;
3371 unlock_buffer(bh);
3372 }
3373 return 1;
3374}
3375EXPORT_SYMBOL(bh_uptodate_or_lock);
3376
3377
3378
3379
3380
3381
3382
3383int bh_submit_read(struct buffer_head *bh)
3384{
3385 BUG_ON(!buffer_locked(bh));
3386
3387 if (buffer_uptodate(bh)) {
3388 unlock_buffer(bh);
3389 return 0;
3390 }
3391
3392 get_bh(bh);
3393 bh->b_end_io = end_buffer_read_sync;
3394 submit_bh(READ, bh);
3395 wait_on_buffer(bh);
3396 if (buffer_uptodate(bh))
3397 return 0;
3398 return -EIO;
3399}
3400EXPORT_SYMBOL(bh_submit_read);
3401
3402void __init buffer_init(void)
3403{
3404 unsigned long nrpages;
3405
3406 bh_cachep = kmem_cache_create("buffer_head",
3407 sizeof(struct buffer_head), 0,
3408 (SLAB_RECLAIM_ACCOUNT|SLAB_PANIC|
3409 SLAB_MEM_SPREAD),
3410 NULL);
3411
3412
3413
3414
3415 nrpages = (nr_free_buffer_pages() * 10) / 100;
3416 max_buffer_heads = nrpages * (PAGE_SIZE / sizeof(struct buffer_head));
3417 hotcpu_notifier(buffer_cpu_notify, 0);
3418}
3419