1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21#include <linux/kernel.h>
22#include <linux/syscalls.h>
23#include <linux/fs.h>
24#include <linux/mm.h>
25#include <linux/percpu.h>
26#include <linux/slab.h>
27#include <linux/capability.h>
28#include <linux/blkdev.h>
29#include <linux/file.h>
30#include <linux/quotaops.h>
31#include <linux/highmem.h>
32#include <linux/export.h>
33#include <linux/backing-dev.h>
34#include <linux/writeback.h>
35#include <linux/hash.h>
36#include <linux/suspend.h>
37#include <linux/buffer_head.h>
38#include <linux/task_io_accounting_ops.h>
39#include <linux/bio.h>
40#include <linux/notifier.h>
41#include <linux/cpu.h>
42#include <linux/bitops.h>
43#include <linux/mpage.h>
44#include <linux/bit_spinlock.h>
45#include <trace/events/block.h>
46
47static int fsync_buffers_list(spinlock_t *lock, struct list_head *list);
48static int submit_bh_wbc(int rw, struct buffer_head *bh,
49 unsigned long bio_flags,
50 struct writeback_control *wbc);
51
52#define BH_ENTRY(list) list_entry((list), struct buffer_head, b_assoc_buffers)
53
54void init_buffer(struct buffer_head *bh, bh_end_io_t *handler, void *private)
55{
56 bh->b_end_io = handler;
57 bh->b_private = private;
58}
59EXPORT_SYMBOL(init_buffer);
60
61inline void touch_buffer(struct buffer_head *bh)
62{
63 trace_block_touch_buffer(bh);
64 mark_page_accessed(bh->b_page);
65}
66EXPORT_SYMBOL(touch_buffer);
67
68void __lock_buffer(struct buffer_head *bh)
69{
70 wait_on_bit_lock_io(&bh->b_state, BH_Lock, TASK_UNINTERRUPTIBLE);
71}
72EXPORT_SYMBOL(__lock_buffer);
73
74void unlock_buffer(struct buffer_head *bh)
75{
76 clear_bit_unlock(BH_Lock, &bh->b_state);
77 smp_mb__after_atomic();
78 wake_up_bit(&bh->b_state, BH_Lock);
79}
80EXPORT_SYMBOL(unlock_buffer);
81
82
83
84
85
86
87void buffer_check_dirty_writeback(struct page *page,
88 bool *dirty, bool *writeback)
89{
90 struct buffer_head *head, *bh;
91 *dirty = false;
92 *writeback = false;
93
94 BUG_ON(!PageLocked(page));
95
96 if (!page_has_buffers(page))
97 return;
98
99 if (PageWriteback(page))
100 *writeback = true;
101
102 head = page_buffers(page);
103 bh = head;
104 do {
105 if (buffer_locked(bh))
106 *writeback = true;
107
108 if (buffer_dirty(bh))
109 *dirty = true;
110
111 bh = bh->b_this_page;
112 } while (bh != head);
113}
114EXPORT_SYMBOL(buffer_check_dirty_writeback);
115
116
117
118
119
120
121void __wait_on_buffer(struct buffer_head * bh)
122{
123 wait_on_bit_io(&bh->b_state, BH_Lock, TASK_UNINTERRUPTIBLE);
124}
125EXPORT_SYMBOL(__wait_on_buffer);
126
127static void
128__clear_page_buffers(struct page *page)
129{
130 ClearPagePrivate(page);
131 set_page_private(page, 0);
132 page_cache_release(page);
133}
134
135static void buffer_io_error(struct buffer_head *bh, char *msg)
136{
137 char b[BDEVNAME_SIZE];
138
139 if (!test_bit(BH_Quiet, &bh->b_state))
140 printk_ratelimited(KERN_ERR
141 "Buffer I/O error on dev %s, logical block %llu%s\n",
142 bdevname(bh->b_bdev, b),
143 (unsigned long long)bh->b_blocknr, msg);
144}
145
146
147
148
149
150
151
152
153
154static void __end_buffer_read_notouch(struct buffer_head *bh, int uptodate)
155{
156 if (uptodate) {
157 set_buffer_uptodate(bh);
158 } else {
159
160 clear_buffer_uptodate(bh);
161 }
162 unlock_buffer(bh);
163}
164
165
166
167
168
169void end_buffer_read_sync(struct buffer_head *bh, int uptodate)
170{
171 __end_buffer_read_notouch(bh, uptodate);
172 put_bh(bh);
173}
174EXPORT_SYMBOL(end_buffer_read_sync);
175
176void end_buffer_write_sync(struct buffer_head *bh, int uptodate)
177{
178 if (uptodate) {
179 set_buffer_uptodate(bh);
180 } else {
181 buffer_io_error(bh, ", lost sync page write");
182 set_buffer_write_io_error(bh);
183 clear_buffer_uptodate(bh);
184 }
185 unlock_buffer(bh);
186 put_bh(bh);
187}
188EXPORT_SYMBOL(end_buffer_write_sync);
189
190
191
192
193
194
195
196
197
198
199
200
201static struct buffer_head *
202__find_get_block_slow(struct block_device *bdev, sector_t block)
203{
204 struct inode *bd_inode = bdev->bd_inode;
205 struct address_space *bd_mapping = bd_inode->i_mapping;
206 struct buffer_head *ret = NULL;
207 pgoff_t index;
208 struct buffer_head *bh;
209 struct buffer_head *head;
210 struct page *page;
211 int all_mapped = 1;
212
213 index = block >> (PAGE_CACHE_SHIFT - bd_inode->i_blkbits);
214 page = find_get_page_flags(bd_mapping, index, FGP_ACCESSED);
215 if (!page)
216 goto out;
217
218 spin_lock(&bd_mapping->private_lock);
219 if (!page_has_buffers(page))
220 goto out_unlock;
221 head = page_buffers(page);
222 bh = head;
223 do {
224 if (!buffer_mapped(bh))
225 all_mapped = 0;
226 else if (bh->b_blocknr == block) {
227 ret = bh;
228 get_bh(bh);
229 goto out_unlock;
230 }
231 bh = bh->b_this_page;
232 } while (bh != head);
233
234
235
236
237
238
239 if (all_mapped) {
240 char b[BDEVNAME_SIZE];
241
242 printk("__find_get_block_slow() failed. "
243 "block=%llu, b_blocknr=%llu\n",
244 (unsigned long long)block,
245 (unsigned long long)bh->b_blocknr);
246 printk("b_state=0x%08lx, b_size=%zu\n",
247 bh->b_state, bh->b_size);
248 printk("device %s blocksize: %d\n", bdevname(bdev, b),
249 1 << bd_inode->i_blkbits);
250 }
251out_unlock:
252 spin_unlock(&bd_mapping->private_lock);
253 page_cache_release(page);
254out:
255 return ret;
256}
257
258
259
260
261static void free_more_memory(void)
262{
263 struct zone *zone;
264 int nid;
265
266 wakeup_flusher_threads(1024, WB_REASON_FREE_MORE_MEM);
267 yield();
268
269 for_each_online_node(nid) {
270 (void)first_zones_zonelist(node_zonelist(nid, GFP_NOFS),
271 gfp_zone(GFP_NOFS), NULL,
272 &zone);
273 if (zone)
274 try_to_free_pages(node_zonelist(nid, GFP_NOFS), 0,
275 GFP_NOFS, NULL);
276 }
277}
278
279
280
281
282
283static void end_buffer_async_read(struct buffer_head *bh, int uptodate)
284{
285 unsigned long flags;
286 struct buffer_head *first;
287 struct buffer_head *tmp;
288 struct page *page;
289 int page_uptodate = 1;
290
291 BUG_ON(!buffer_async_read(bh));
292
293 page = bh->b_page;
294 if (uptodate) {
295 set_buffer_uptodate(bh);
296 } else {
297 clear_buffer_uptodate(bh);
298 buffer_io_error(bh, ", async page read");
299 SetPageError(page);
300 }
301
302
303
304
305
306
307 first = page_buffers(page);
308 local_irq_save(flags);
309 bit_spin_lock(BH_Uptodate_Lock, &first->b_state);
310 clear_buffer_async_read(bh);
311 unlock_buffer(bh);
312 tmp = bh;
313 do {
314 if (!buffer_uptodate(tmp))
315 page_uptodate = 0;
316 if (buffer_async_read(tmp)) {
317 BUG_ON(!buffer_locked(tmp));
318 goto still_busy;
319 }
320 tmp = tmp->b_this_page;
321 } while (tmp != bh);
322 bit_spin_unlock(BH_Uptodate_Lock, &first->b_state);
323 local_irq_restore(flags);
324
325
326
327
328
329 if (page_uptodate && !PageError(page))
330 SetPageUptodate(page);
331 unlock_page(page);
332 return;
333
334still_busy:
335 bit_spin_unlock(BH_Uptodate_Lock, &first->b_state);
336 local_irq_restore(flags);
337 return;
338}
339
340
341
342
343
344void end_buffer_async_write(struct buffer_head *bh, int uptodate)
345{
346 unsigned long flags;
347 struct buffer_head *first;
348 struct buffer_head *tmp;
349 struct page *page;
350
351 BUG_ON(!buffer_async_write(bh));
352
353 page = bh->b_page;
354 if (uptodate) {
355 set_buffer_uptodate(bh);
356 } else {
357 buffer_io_error(bh, ", lost async page write");
358 set_bit(AS_EIO, &page->mapping->flags);
359 set_buffer_write_io_error(bh);
360 clear_buffer_uptodate(bh);
361 SetPageError(page);
362 }
363
364 first = page_buffers(page);
365 local_irq_save(flags);
366 bit_spin_lock(BH_Uptodate_Lock, &first->b_state);
367
368 clear_buffer_async_write(bh);
369 unlock_buffer(bh);
370 tmp = bh->b_this_page;
371 while (tmp != bh) {
372 if (buffer_async_write(tmp)) {
373 BUG_ON(!buffer_locked(tmp));
374 goto still_busy;
375 }
376 tmp = tmp->b_this_page;
377 }
378 bit_spin_unlock(BH_Uptodate_Lock, &first->b_state);
379 local_irq_restore(flags);
380 end_page_writeback(page);
381 return;
382
383still_busy:
384 bit_spin_unlock(BH_Uptodate_Lock, &first->b_state);
385 local_irq_restore(flags);
386 return;
387}
388EXPORT_SYMBOL(end_buffer_async_write);
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411static void mark_buffer_async_read(struct buffer_head *bh)
412{
413 bh->b_end_io = end_buffer_async_read;
414 set_buffer_async_read(bh);
415}
416
417static void mark_buffer_async_write_endio(struct buffer_head *bh,
418 bh_end_io_t *handler)
419{
420 bh->b_end_io = handler;
421 set_buffer_async_write(bh);
422}
423
424void mark_buffer_async_write(struct buffer_head *bh)
425{
426 mark_buffer_async_write_endio(bh, end_buffer_async_write);
427}
428EXPORT_SYMBOL(mark_buffer_async_write);
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483static void __remove_assoc_queue(struct buffer_head *bh)
484{
485 list_del_init(&bh->b_assoc_buffers);
486 WARN_ON(!bh->b_assoc_map);
487 if (buffer_write_io_error(bh))
488 set_bit(AS_EIO, &bh->b_assoc_map->flags);
489 bh->b_assoc_map = NULL;
490}
491
492int inode_has_buffers(struct inode *inode)
493{
494 return !list_empty(&inode->i_data.private_list);
495}
496
497
498
499
500
501
502
503
504
505
506
507static int osync_buffers_list(spinlock_t *lock, struct list_head *list)
508{
509 struct buffer_head *bh;
510 struct list_head *p;
511 int err = 0;
512
513 spin_lock(lock);
514repeat:
515 list_for_each_prev(p, list) {
516 bh = BH_ENTRY(p);
517 if (buffer_locked(bh)) {
518 get_bh(bh);
519 spin_unlock(lock);
520 wait_on_buffer(bh);
521 if (!buffer_uptodate(bh))
522 err = -EIO;
523 brelse(bh);
524 spin_lock(lock);
525 goto repeat;
526 }
527 }
528 spin_unlock(lock);
529 return err;
530}
531
532static void do_thaw_one(struct super_block *sb, void *unused)
533{
534 char b[BDEVNAME_SIZE];
535 while (sb->s_bdev && !thaw_bdev(sb->s_bdev, sb))
536 printk(KERN_WARNING "Emergency Thaw on %s\n",
537 bdevname(sb->s_bdev, b));
538}
539
540static void do_thaw_all(struct work_struct *work)
541{
542 iterate_supers(do_thaw_one, NULL);
543 kfree(work);
544 printk(KERN_WARNING "Emergency Thaw complete\n");
545}
546
547
548
549
550
551
552void emergency_thaw_all(void)
553{
554 struct work_struct *work;
555
556 work = kmalloc(sizeof(*work), GFP_ATOMIC);
557 if (work) {
558 INIT_WORK(work, do_thaw_all);
559 schedule_work(work);
560 }
561}
562
563
564
565
566
567
568
569
570
571
572
573
574int sync_mapping_buffers(struct address_space *mapping)
575{
576 struct address_space *buffer_mapping = mapping->private_data;
577
578 if (buffer_mapping == NULL || list_empty(&mapping->private_list))
579 return 0;
580
581 return fsync_buffers_list(&buffer_mapping->private_lock,
582 &mapping->private_list);
583}
584EXPORT_SYMBOL(sync_mapping_buffers);
585
586
587
588
589
590
591
592void write_boundary_block(struct block_device *bdev,
593 sector_t bblock, unsigned blocksize)
594{
595 struct buffer_head *bh = __find_get_block(bdev, bblock + 1, blocksize);
596 if (bh) {
597 if (buffer_dirty(bh))
598 ll_rw_block(WRITE, 1, &bh);
599 put_bh(bh);
600 }
601}
602
603void mark_buffer_dirty_inode(struct buffer_head *bh, struct inode *inode)
604{
605 struct address_space *mapping = inode->i_mapping;
606 struct address_space *buffer_mapping = bh->b_page->mapping;
607
608 mark_buffer_dirty(bh);
609 if (!mapping->private_data) {
610 mapping->private_data = buffer_mapping;
611 } else {
612 BUG_ON(mapping->private_data != buffer_mapping);
613 }
614 if (!bh->b_assoc_map) {
615 spin_lock(&buffer_mapping->private_lock);
616 list_move_tail(&bh->b_assoc_buffers,
617 &mapping->private_list);
618 bh->b_assoc_map = mapping;
619 spin_unlock(&buffer_mapping->private_lock);
620 }
621}
622EXPORT_SYMBOL(mark_buffer_dirty_inode);
623
624
625
626
627
628
629
630
631
632
633static void __set_page_dirty(struct page *page, struct address_space *mapping,
634 struct mem_cgroup *memcg, int warn)
635{
636 unsigned long flags;
637
638 spin_lock_irqsave(&mapping->tree_lock, flags);
639 if (page->mapping) {
640 WARN_ON_ONCE(warn && !PageUptodate(page));
641 account_page_dirtied(page, mapping, memcg);
642 radix_tree_tag_set(&mapping->page_tree,
643 page_index(page), PAGECACHE_TAG_DIRTY);
644 }
645 spin_unlock_irqrestore(&mapping->tree_lock, flags);
646}
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673int __set_page_dirty_buffers(struct page *page)
674{
675 int newly_dirty;
676 struct mem_cgroup *memcg;
677 struct address_space *mapping = page_mapping(page);
678
679 if (unlikely(!mapping))
680 return !TestSetPageDirty(page);
681
682 spin_lock(&mapping->private_lock);
683 if (page_has_buffers(page)) {
684 struct buffer_head *head = page_buffers(page);
685 struct buffer_head *bh = head;
686
687 do {
688 set_buffer_dirty(bh);
689 bh = bh->b_this_page;
690 } while (bh != head);
691 }
692
693
694
695
696 memcg = mem_cgroup_begin_page_stat(page);
697 newly_dirty = !TestSetPageDirty(page);
698 spin_unlock(&mapping->private_lock);
699
700 if (newly_dirty)
701 __set_page_dirty(page, mapping, memcg, 1);
702
703 mem_cgroup_end_page_stat(memcg);
704
705 if (newly_dirty)
706 __mark_inode_dirty(mapping->host, I_DIRTY_PAGES);
707
708 return newly_dirty;
709}
710EXPORT_SYMBOL(__set_page_dirty_buffers);
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731static int fsync_buffers_list(spinlock_t *lock, struct list_head *list)
732{
733 struct buffer_head *bh;
734 struct list_head tmp;
735 struct address_space *mapping;
736 int err = 0, err2;
737 struct blk_plug plug;
738
739 INIT_LIST_HEAD(&tmp);
740 blk_start_plug(&plug);
741
742 spin_lock(lock);
743 while (!list_empty(list)) {
744 bh = BH_ENTRY(list->next);
745 mapping = bh->b_assoc_map;
746 __remove_assoc_queue(bh);
747
748
749 smp_mb();
750 if (buffer_dirty(bh) || buffer_locked(bh)) {
751 list_add(&bh->b_assoc_buffers, &tmp);
752 bh->b_assoc_map = mapping;
753 if (buffer_dirty(bh)) {
754 get_bh(bh);
755 spin_unlock(lock);
756
757
758
759
760
761
762
763 write_dirty_buffer(bh, WRITE_SYNC);
764
765
766
767
768
769
770
771 brelse(bh);
772 spin_lock(lock);
773 }
774 }
775 }
776
777 spin_unlock(lock);
778 blk_finish_plug(&plug);
779 spin_lock(lock);
780
781 while (!list_empty(&tmp)) {
782 bh = BH_ENTRY(tmp.prev);
783 get_bh(bh);
784 mapping = bh->b_assoc_map;
785 __remove_assoc_queue(bh);
786
787
788 smp_mb();
789 if (buffer_dirty(bh)) {
790 list_add(&bh->b_assoc_buffers,
791 &mapping->private_list);
792 bh->b_assoc_map = mapping;
793 }
794 spin_unlock(lock);
795 wait_on_buffer(bh);
796 if (!buffer_uptodate(bh))
797 err = -EIO;
798 brelse(bh);
799 spin_lock(lock);
800 }
801
802 spin_unlock(lock);
803 err2 = osync_buffers_list(lock, list);
804 if (err)
805 return err;
806 else
807 return err2;
808}
809
810
811
812
813
814
815
816
817
818
819void invalidate_inode_buffers(struct inode *inode)
820{
821 if (inode_has_buffers(inode)) {
822 struct address_space *mapping = &inode->i_data;
823 struct list_head *list = &mapping->private_list;
824 struct address_space *buffer_mapping = mapping->private_data;
825
826 spin_lock(&buffer_mapping->private_lock);
827 while (!list_empty(list))
828 __remove_assoc_queue(BH_ENTRY(list->next));
829 spin_unlock(&buffer_mapping->private_lock);
830 }
831}
832EXPORT_SYMBOL(invalidate_inode_buffers);
833
834
835
836
837
838
839
840int remove_inode_buffers(struct inode *inode)
841{
842 int ret = 1;
843
844 if (inode_has_buffers(inode)) {
845 struct address_space *mapping = &inode->i_data;
846 struct list_head *list = &mapping->private_list;
847 struct address_space *buffer_mapping = mapping->private_data;
848
849 spin_lock(&buffer_mapping->private_lock);
850 while (!list_empty(list)) {
851 struct buffer_head *bh = BH_ENTRY(list->next);
852 if (buffer_dirty(bh)) {
853 ret = 0;
854 break;
855 }
856 __remove_assoc_queue(bh);
857 }
858 spin_unlock(&buffer_mapping->private_lock);
859 }
860 return ret;
861}
862
863
864
865
866
867
868
869
870
871
872struct buffer_head *alloc_page_buffers(struct page *page, unsigned long size,
873 int retry)
874{
875 struct buffer_head *bh, *head;
876 long offset;
877
878try_again:
879 head = NULL;
880 offset = PAGE_SIZE;
881 while ((offset -= size) >= 0) {
882 bh = alloc_buffer_head(GFP_NOFS);
883 if (!bh)
884 goto no_grow;
885
886 bh->b_this_page = head;
887 bh->b_blocknr = -1;
888 head = bh;
889
890 bh->b_size = size;
891
892
893 set_bh_page(bh, page, offset);
894 }
895 return head;
896
897
898
899no_grow:
900 if (head) {
901 do {
902 bh = head;
903 head = head->b_this_page;
904 free_buffer_head(bh);
905 } while (head);
906 }
907
908
909
910
911
912
913
914 if (!retry)
915 return NULL;
916
917
918
919
920
921
922
923 free_more_memory();
924 goto try_again;
925}
926EXPORT_SYMBOL_GPL(alloc_page_buffers);
927
928static inline void
929link_dev_buffers(struct page *page, struct buffer_head *head)
930{
931 struct buffer_head *bh, *tail;
932
933 bh = head;
934 do {
935 tail = bh;
936 bh = bh->b_this_page;
937 } while (bh);
938 tail->b_this_page = head;
939 attach_page_buffers(page, head);
940}
941
942static sector_t blkdev_max_block(struct block_device *bdev, unsigned int size)
943{
944 sector_t retval = ~((sector_t)0);
945 loff_t sz = i_size_read(bdev->bd_inode);
946
947 if (sz) {
948 unsigned int sizebits = blksize_bits(size);
949 retval = (sz >> sizebits);
950 }
951 return retval;
952}
953
954
955
956
957static sector_t
958init_page_buffers(struct page *page, struct block_device *bdev,
959 sector_t block, int size)
960{
961 struct buffer_head *head = page_buffers(page);
962 struct buffer_head *bh = head;
963 int uptodate = PageUptodate(page);
964 sector_t end_block = blkdev_max_block(I_BDEV(bdev->bd_inode), size);
965
966 do {
967 if (!buffer_mapped(bh)) {
968 init_buffer(bh, NULL, NULL);
969 bh->b_bdev = bdev;
970 bh->b_blocknr = block;
971 if (uptodate)
972 set_buffer_uptodate(bh);
973 if (block < end_block)
974 set_buffer_mapped(bh);
975 }
976 block++;
977 bh = bh->b_this_page;
978 } while (bh != head);
979
980
981
982
983 return end_block;
984}
985
986
987
988
989
990
991static int
992grow_dev_page(struct block_device *bdev, sector_t block,
993 pgoff_t index, int size, int sizebits, gfp_t gfp)
994{
995 struct inode *inode = bdev->bd_inode;
996 struct page *page;
997 struct buffer_head *bh;
998 sector_t end_block;
999 int ret = 0;
1000 gfp_t gfp_mask;
1001
1002 gfp_mask = mapping_gfp_constraint(inode->i_mapping, ~__GFP_FS) | gfp;
1003
1004
1005
1006
1007
1008
1009
1010 gfp_mask |= __GFP_NOFAIL;
1011
1012 page = find_or_create_page(inode->i_mapping, index, gfp_mask);
1013 if (!page)
1014 return ret;
1015
1016 BUG_ON(!PageLocked(page));
1017
1018 if (page_has_buffers(page)) {
1019 bh = page_buffers(page);
1020 if (bh->b_size == size) {
1021 end_block = init_page_buffers(page, bdev,
1022 (sector_t)index << sizebits,
1023 size);
1024 goto done;
1025 }
1026 if (!try_to_free_buffers(page))
1027 goto failed;
1028 }
1029
1030
1031
1032
1033 bh = alloc_page_buffers(page, size, 0);
1034 if (!bh)
1035 goto failed;
1036
1037
1038
1039
1040
1041
1042 spin_lock(&inode->i_mapping->private_lock);
1043 link_dev_buffers(page, bh);
1044 end_block = init_page_buffers(page, bdev, (sector_t)index << sizebits,
1045 size);
1046 spin_unlock(&inode->i_mapping->private_lock);
1047done:
1048 ret = (block < end_block) ? 1 : -ENXIO;
1049failed:
1050 unlock_page(page);
1051 page_cache_release(page);
1052 return ret;
1053}
1054
1055
1056
1057
1058
1059static int
1060grow_buffers(struct block_device *bdev, sector_t block, int size, gfp_t gfp)
1061{
1062 pgoff_t index;
1063 int sizebits;
1064
1065 sizebits = -1;
1066 do {
1067 sizebits++;
1068 } while ((size << sizebits) < PAGE_SIZE);
1069
1070 index = block >> sizebits;
1071
1072
1073
1074
1075
1076 if (unlikely(index != block >> sizebits)) {
1077 char b[BDEVNAME_SIZE];
1078
1079 printk(KERN_ERR "%s: requested out-of-range block %llu for "
1080 "device %s\n",
1081 __func__, (unsigned long long)block,
1082 bdevname(bdev, b));
1083 return -EIO;
1084 }
1085
1086
1087 return grow_dev_page(bdev, block, index, size, sizebits, gfp);
1088}
1089
1090struct buffer_head *
1091__getblk_slow(struct block_device *bdev, sector_t block,
1092 unsigned size, gfp_t gfp)
1093{
1094
1095 if (unlikely(size & (bdev_logical_block_size(bdev)-1) ||
1096 (size < 512 || size > PAGE_SIZE))) {
1097 printk(KERN_ERR "getblk(): invalid block size %d requested\n",
1098 size);
1099 printk(KERN_ERR "logical block size: %d\n",
1100 bdev_logical_block_size(bdev));
1101
1102 dump_stack();
1103 return NULL;
1104 }
1105
1106 for (;;) {
1107 struct buffer_head *bh;
1108 int ret;
1109
1110 bh = __find_get_block(bdev, block, size);
1111 if (bh)
1112 return bh;
1113
1114 ret = grow_buffers(bdev, block, size, gfp);
1115 if (ret < 0)
1116 return NULL;
1117 if (ret == 0)
1118 free_more_memory();
1119 }
1120}
1121EXPORT_SYMBOL(__getblk_slow);
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158void mark_buffer_dirty(struct buffer_head *bh)
1159{
1160 WARN_ON_ONCE(!buffer_uptodate(bh));
1161
1162 trace_block_dirty_buffer(bh);
1163
1164
1165
1166
1167
1168
1169
1170 if (buffer_dirty(bh)) {
1171 smp_mb();
1172 if (buffer_dirty(bh))
1173 return;
1174 }
1175
1176 if (!test_set_buffer_dirty(bh)) {
1177 struct page *page = bh->b_page;
1178 struct address_space *mapping = NULL;
1179 struct mem_cgroup *memcg;
1180
1181 memcg = mem_cgroup_begin_page_stat(page);
1182 if (!TestSetPageDirty(page)) {
1183 mapping = page_mapping(page);
1184 if (mapping)
1185 __set_page_dirty(page, mapping, memcg, 0);
1186 }
1187 mem_cgroup_end_page_stat(memcg);
1188 if (mapping)
1189 __mark_inode_dirty(mapping->host, I_DIRTY_PAGES);
1190 }
1191}
1192EXPORT_SYMBOL(mark_buffer_dirty);
1193
1194
1195
1196
1197
1198
1199
1200
1201void __brelse(struct buffer_head * buf)
1202{
1203 if (atomic_read(&buf->b_count)) {
1204 put_bh(buf);
1205 return;
1206 }
1207 WARN(1, KERN_ERR "VFS: brelse: Trying to free free buffer\n");
1208}
1209EXPORT_SYMBOL(__brelse);
1210
1211
1212
1213
1214
1215void __bforget(struct buffer_head *bh)
1216{
1217 clear_buffer_dirty(bh);
1218 if (bh->b_assoc_map) {
1219 struct address_space *buffer_mapping = bh->b_page->mapping;
1220
1221 spin_lock(&buffer_mapping->private_lock);
1222 list_del_init(&bh->b_assoc_buffers);
1223 bh->b_assoc_map = NULL;
1224 spin_unlock(&buffer_mapping->private_lock);
1225 }
1226 __brelse(bh);
1227}
1228EXPORT_SYMBOL(__bforget);
1229
1230static struct buffer_head *__bread_slow(struct buffer_head *bh)
1231{
1232 lock_buffer(bh);
1233 if (buffer_uptodate(bh)) {
1234 unlock_buffer(bh);
1235 return bh;
1236 } else {
1237 get_bh(bh);
1238 bh->b_end_io = end_buffer_read_sync;
1239 submit_bh(READ, bh);
1240 wait_on_buffer(bh);
1241 if (buffer_uptodate(bh))
1242 return bh;
1243 }
1244 brelse(bh);
1245 return NULL;
1246}
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262#define BH_LRU_SIZE 16
1263
1264struct bh_lru {
1265 struct buffer_head *bhs[BH_LRU_SIZE];
1266};
1267
1268static DEFINE_PER_CPU(struct bh_lru, bh_lrus) = {{ NULL }};
1269
1270#ifdef CONFIG_SMP
1271#define bh_lru_lock() local_irq_disable()
1272#define bh_lru_unlock() local_irq_enable()
1273#else
1274#define bh_lru_lock() preempt_disable()
1275#define bh_lru_unlock() preempt_enable()
1276#endif
1277
1278static inline void check_irqs_on(void)
1279{
1280#ifdef irqs_disabled
1281 BUG_ON(irqs_disabled());
1282#endif
1283}
1284
1285
1286
1287
1288static void bh_lru_install(struct buffer_head *bh)
1289{
1290 struct buffer_head *evictee = NULL;
1291
1292 check_irqs_on();
1293 bh_lru_lock();
1294 if (__this_cpu_read(bh_lrus.bhs[0]) != bh) {
1295 struct buffer_head *bhs[BH_LRU_SIZE];
1296 int in;
1297 int out = 0;
1298
1299 get_bh(bh);
1300 bhs[out++] = bh;
1301 for (in = 0; in < BH_LRU_SIZE; in++) {
1302 struct buffer_head *bh2 =
1303 __this_cpu_read(bh_lrus.bhs[in]);
1304
1305 if (bh2 == bh) {
1306 __brelse(bh2);
1307 } else {
1308 if (out >= BH_LRU_SIZE) {
1309 BUG_ON(evictee != NULL);
1310 evictee = bh2;
1311 } else {
1312 bhs[out++] = bh2;
1313 }
1314 }
1315 }
1316 while (out < BH_LRU_SIZE)
1317 bhs[out++] = NULL;
1318 memcpy(this_cpu_ptr(&bh_lrus.bhs), bhs, sizeof(bhs));
1319 }
1320 bh_lru_unlock();
1321
1322 if (evictee)
1323 __brelse(evictee);
1324}
1325
1326
1327
1328
1329static struct buffer_head *
1330lookup_bh_lru(struct block_device *bdev, sector_t block, unsigned size)
1331{
1332 struct buffer_head *ret = NULL;
1333 unsigned int i;
1334
1335 check_irqs_on();
1336 bh_lru_lock();
1337 for (i = 0; i < BH_LRU_SIZE; i++) {
1338 struct buffer_head *bh = __this_cpu_read(bh_lrus.bhs[i]);
1339
1340 if (bh && bh->b_blocknr == block && bh->b_bdev == bdev &&
1341 bh->b_size == size) {
1342 if (i) {
1343 while (i) {
1344 __this_cpu_write(bh_lrus.bhs[i],
1345 __this_cpu_read(bh_lrus.bhs[i - 1]));
1346 i--;
1347 }
1348 __this_cpu_write(bh_lrus.bhs[0], bh);
1349 }
1350 get_bh(bh);
1351 ret = bh;
1352 break;
1353 }
1354 }
1355 bh_lru_unlock();
1356 return ret;
1357}
1358
1359
1360
1361
1362
1363
1364struct buffer_head *
1365__find_get_block(struct block_device *bdev, sector_t block, unsigned size)
1366{
1367 struct buffer_head *bh = lookup_bh_lru(bdev, block, size);
1368
1369 if (bh == NULL) {
1370
1371 bh = __find_get_block_slow(bdev, block);
1372 if (bh)
1373 bh_lru_install(bh);
1374 } else
1375 touch_buffer(bh);
1376
1377 return bh;
1378}
1379EXPORT_SYMBOL(__find_get_block);
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389struct buffer_head *
1390__getblk_gfp(struct block_device *bdev, sector_t block,
1391 unsigned size, gfp_t gfp)
1392{
1393 struct buffer_head *bh = __find_get_block(bdev, block, size);
1394
1395 might_sleep();
1396 if (bh == NULL)
1397 bh = __getblk_slow(bdev, block, size, gfp);
1398 return bh;
1399}
1400EXPORT_SYMBOL(__getblk_gfp);
1401
1402
1403
1404
1405void __breadahead(struct block_device *bdev, sector_t block, unsigned size)
1406{
1407 struct buffer_head *bh = __getblk(bdev, block, size);
1408 if (likely(bh)) {
1409 ll_rw_block(READA, 1, &bh);
1410 brelse(bh);
1411 }
1412}
1413EXPORT_SYMBOL(__breadahead);
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427struct buffer_head *
1428__bread_gfp(struct block_device *bdev, sector_t block,
1429 unsigned size, gfp_t gfp)
1430{
1431 struct buffer_head *bh = __getblk_gfp(bdev, block, size, gfp);
1432
1433 if (likely(bh) && !buffer_uptodate(bh))
1434 bh = __bread_slow(bh);
1435 return bh;
1436}
1437EXPORT_SYMBOL(__bread_gfp);
1438
1439
1440
1441
1442
1443
1444static void invalidate_bh_lru(void *arg)
1445{
1446 struct bh_lru *b = &get_cpu_var(bh_lrus);
1447 int i;
1448
1449 for (i = 0; i < BH_LRU_SIZE; i++) {
1450 brelse(b->bhs[i]);
1451 b->bhs[i] = NULL;
1452 }
1453 put_cpu_var(bh_lrus);
1454}
1455
1456static bool has_bh_in_lru(int cpu, void *dummy)
1457{
1458 struct bh_lru *b = per_cpu_ptr(&bh_lrus, cpu);
1459 int i;
1460
1461 for (i = 0; i < BH_LRU_SIZE; i++) {
1462 if (b->bhs[i])
1463 return 1;
1464 }
1465
1466 return 0;
1467}
1468
1469void invalidate_bh_lrus(void)
1470{
1471 on_each_cpu_cond(has_bh_in_lru, invalidate_bh_lru, NULL, 1, GFP_KERNEL);
1472}
1473EXPORT_SYMBOL_GPL(invalidate_bh_lrus);
1474
1475void set_bh_page(struct buffer_head *bh,
1476 struct page *page, unsigned long offset)
1477{
1478 bh->b_page = page;
1479 BUG_ON(offset >= PAGE_SIZE);
1480 if (PageHighMem(page))
1481
1482
1483
1484 bh->b_data = (char *)(0 + offset);
1485 else
1486 bh->b_data = page_address(page) + offset;
1487}
1488EXPORT_SYMBOL(set_bh_page);
1489
1490
1491
1492
1493
1494
1495#define BUFFER_FLAGS_DISCARD \
1496 (1 << BH_Mapped | 1 << BH_New | 1 << BH_Req | \
1497 1 << BH_Delay | 1 << BH_Unwritten)
1498
1499static void discard_buffer(struct buffer_head * bh)
1500{
1501 unsigned long b_state, b_state_old;
1502
1503 lock_buffer(bh);
1504 clear_buffer_dirty(bh);
1505 bh->b_bdev = NULL;
1506 b_state = bh->b_state;
1507 for (;;) {
1508 b_state_old = cmpxchg(&bh->b_state, b_state,
1509 (b_state & ~BUFFER_FLAGS_DISCARD));
1510 if (b_state_old == b_state)
1511 break;
1512 b_state = b_state_old;
1513 }
1514 unlock_buffer(bh);
1515}
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533void block_invalidatepage(struct page *page, unsigned int offset,
1534 unsigned int length)
1535{
1536 struct buffer_head *head, *bh, *next;
1537 unsigned int curr_off = 0;
1538 unsigned int stop = length + offset;
1539
1540 BUG_ON(!PageLocked(page));
1541 if (!page_has_buffers(page))
1542 goto out;
1543
1544
1545
1546
1547 BUG_ON(stop > PAGE_CACHE_SIZE || stop < length);
1548
1549 head = page_buffers(page);
1550 bh = head;
1551 do {
1552 unsigned int next_off = curr_off + bh->b_size;
1553 next = bh->b_this_page;
1554
1555
1556
1557
1558 if (next_off > stop)
1559 goto out;
1560
1561
1562
1563
1564 if (offset <= curr_off)
1565 discard_buffer(bh);
1566 curr_off = next_off;
1567 bh = next;
1568 } while (bh != head);
1569
1570
1571
1572
1573
1574
1575 if (offset == 0)
1576 try_to_release_page(page, 0);
1577out:
1578 return;
1579}
1580EXPORT_SYMBOL(block_invalidatepage);
1581
1582
1583
1584
1585
1586
1587
1588void create_empty_buffers(struct page *page,
1589 unsigned long blocksize, unsigned long b_state)
1590{
1591 struct buffer_head *bh, *head, *tail;
1592
1593 head = alloc_page_buffers(page, blocksize, 1);
1594 bh = head;
1595 do {
1596 bh->b_state |= b_state;
1597 tail = bh;
1598 bh = bh->b_this_page;
1599 } while (bh);
1600 tail->b_this_page = head;
1601
1602 spin_lock(&page->mapping->private_lock);
1603 if (PageUptodate(page) || PageDirty(page)) {
1604 bh = head;
1605 do {
1606 if (PageDirty(page))
1607 set_buffer_dirty(bh);
1608 if (PageUptodate(page))
1609 set_buffer_uptodate(bh);
1610 bh = bh->b_this_page;
1611 } while (bh != head);
1612 }
1613 attach_page_buffers(page, head);
1614 spin_unlock(&page->mapping->private_lock);
1615}
1616EXPORT_SYMBOL(create_empty_buffers);
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634void unmap_underlying_metadata(struct block_device *bdev, sector_t block)
1635{
1636 struct buffer_head *old_bh;
1637
1638 might_sleep();
1639
1640 old_bh = __find_get_block_slow(bdev, block);
1641 if (old_bh) {
1642 clear_buffer_dirty(old_bh);
1643 wait_on_buffer(old_bh);
1644 clear_buffer_req(old_bh);
1645 __brelse(old_bh);
1646 }
1647}
1648EXPORT_SYMBOL(unmap_underlying_metadata);
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658static inline int block_size_bits(unsigned int blocksize)
1659{
1660 return ilog2(blocksize);
1661}
1662
1663static struct buffer_head *create_page_buffers(struct page *page, struct inode *inode, unsigned int b_state)
1664{
1665 BUG_ON(!PageLocked(page));
1666
1667 if (!page_has_buffers(page))
1668 create_empty_buffers(page, 1 << ACCESS_ONCE(inode->i_blkbits), b_state);
1669 return page_buffers(page);
1670}
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701static int __block_write_full_page(struct inode *inode, struct page *page,
1702 get_block_t *get_block, struct writeback_control *wbc,
1703 bh_end_io_t *handler)
1704{
1705 int err;
1706 sector_t block;
1707 sector_t last_block;
1708 struct buffer_head *bh, *head;
1709 unsigned int blocksize, bbits;
1710 int nr_underway = 0;
1711 int write_op = (wbc->sync_mode == WB_SYNC_ALL ? WRITE_SYNC : WRITE);
1712
1713 head = create_page_buffers(page, inode,
1714 (1 << BH_Dirty)|(1 << BH_Uptodate));
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726 bh = head;
1727 blocksize = bh->b_size;
1728 bbits = block_size_bits(blocksize);
1729
1730 block = (sector_t)page->index << (PAGE_CACHE_SHIFT - bbits);
1731 last_block = (i_size_read(inode) - 1) >> bbits;
1732
1733
1734
1735
1736
1737 do {
1738 if (block > last_block) {
1739
1740
1741
1742
1743
1744
1745
1746
1747 clear_buffer_dirty(bh);
1748 set_buffer_uptodate(bh);
1749 } else if ((!buffer_mapped(bh) || buffer_delay(bh)) &&
1750 buffer_dirty(bh)) {
1751 WARN_ON(bh->b_size != blocksize);
1752 err = get_block(inode, block, bh, 1);
1753 if (err)
1754 goto recover;
1755 clear_buffer_delay(bh);
1756 if (buffer_new(bh)) {
1757
1758 clear_buffer_new(bh);
1759 unmap_underlying_metadata(bh->b_bdev,
1760 bh->b_blocknr);
1761 }
1762 }
1763 bh = bh->b_this_page;
1764 block++;
1765 } while (bh != head);
1766
1767 do {
1768 if (!buffer_mapped(bh))
1769 continue;
1770
1771
1772
1773
1774
1775
1776
1777 if (wbc->sync_mode != WB_SYNC_NONE) {
1778 lock_buffer(bh);
1779 } else if (!trylock_buffer(bh)) {
1780 redirty_page_for_writepage(wbc, page);
1781 continue;
1782 }
1783 if (test_clear_buffer_dirty(bh)) {
1784 mark_buffer_async_write_endio(bh, handler);
1785 } else {
1786 unlock_buffer(bh);
1787 }
1788 } while ((bh = bh->b_this_page) != head);
1789
1790
1791
1792
1793
1794 BUG_ON(PageWriteback(page));
1795 set_page_writeback(page);
1796
1797 do {
1798 struct buffer_head *next = bh->b_this_page;
1799 if (buffer_async_write(bh)) {
1800 submit_bh_wbc(write_op, bh, 0, wbc);
1801 nr_underway++;
1802 }
1803 bh = next;
1804 } while (bh != head);
1805 unlock_page(page);
1806
1807 err = 0;
1808done:
1809 if (nr_underway == 0) {
1810
1811
1812
1813
1814
1815 end_page_writeback(page);
1816
1817
1818
1819
1820
1821 }
1822 return err;
1823
1824recover:
1825
1826
1827
1828
1829
1830
1831 bh = head;
1832
1833 do {
1834 if (buffer_mapped(bh) && buffer_dirty(bh) &&
1835 !buffer_delay(bh)) {
1836 lock_buffer(bh);
1837 mark_buffer_async_write_endio(bh, handler);
1838 } else {
1839
1840
1841
1842
1843 clear_buffer_dirty(bh);
1844 }
1845 } while ((bh = bh->b_this_page) != head);
1846 SetPageError(page);
1847 BUG_ON(PageWriteback(page));
1848 mapping_set_error(page->mapping, err);
1849 set_page_writeback(page);
1850 do {
1851 struct buffer_head *next = bh->b_this_page;
1852 if (buffer_async_write(bh)) {
1853 clear_buffer_dirty(bh);
1854 submit_bh_wbc(write_op, bh, 0, wbc);
1855 nr_underway++;
1856 }
1857 bh = next;
1858 } while (bh != head);
1859 unlock_page(page);
1860 goto done;
1861}
1862
1863
1864
1865
1866
1867
1868void page_zero_new_buffers(struct page *page, unsigned from, unsigned to)
1869{
1870 unsigned int block_start, block_end;
1871 struct buffer_head *head, *bh;
1872
1873 BUG_ON(!PageLocked(page));
1874 if (!page_has_buffers(page))
1875 return;
1876
1877 bh = head = page_buffers(page);
1878 block_start = 0;
1879 do {
1880 block_end = block_start + bh->b_size;
1881
1882 if (buffer_new(bh)) {
1883 if (block_end > from && block_start < to) {
1884 if (!PageUptodate(page)) {
1885 unsigned start, size;
1886
1887 start = max(from, block_start);
1888 size = min(to, block_end) - start;
1889
1890 zero_user(page, start, size);
1891 set_buffer_uptodate(bh);
1892 }
1893
1894 clear_buffer_new(bh);
1895 mark_buffer_dirty(bh);
1896 }
1897 }
1898
1899 block_start = block_end;
1900 bh = bh->b_this_page;
1901 } while (bh != head);
1902}
1903EXPORT_SYMBOL(page_zero_new_buffers);
1904
1905int __block_write_begin(struct page *page, loff_t pos, unsigned len,
1906 get_block_t *get_block)
1907{
1908 unsigned from = pos & (PAGE_CACHE_SIZE - 1);
1909 unsigned to = from + len;
1910 struct inode *inode = page->mapping->host;
1911 unsigned block_start, block_end;
1912 sector_t block;
1913 int err = 0;
1914 unsigned blocksize, bbits;
1915 struct buffer_head *bh, *head, *wait[2], **wait_bh=wait;
1916
1917 BUG_ON(!PageLocked(page));
1918 BUG_ON(from > PAGE_CACHE_SIZE);
1919 BUG_ON(to > PAGE_CACHE_SIZE);
1920 BUG_ON(from > to);
1921
1922 head = create_page_buffers(page, inode, 0);
1923 blocksize = head->b_size;
1924 bbits = block_size_bits(blocksize);
1925
1926 block = (sector_t)page->index << (PAGE_CACHE_SHIFT - bbits);
1927
1928 for(bh = head, block_start = 0; bh != head || !block_start;
1929 block++, block_start=block_end, bh = bh->b_this_page) {
1930 block_end = block_start + blocksize;
1931 if (block_end <= from || block_start >= to) {
1932 if (PageUptodate(page)) {
1933 if (!buffer_uptodate(bh))
1934 set_buffer_uptodate(bh);
1935 }
1936 continue;
1937 }
1938 if (buffer_new(bh))
1939 clear_buffer_new(bh);
1940 if (!buffer_mapped(bh)) {
1941 WARN_ON(bh->b_size != blocksize);
1942 err = get_block(inode, block, bh, 1);
1943 if (err)
1944 break;
1945 if (buffer_new(bh)) {
1946 unmap_underlying_metadata(bh->b_bdev,
1947 bh->b_blocknr);
1948 if (PageUptodate(page)) {
1949 clear_buffer_new(bh);
1950 set_buffer_uptodate(bh);
1951 mark_buffer_dirty(bh);
1952 continue;
1953 }
1954 if (block_end > to || block_start < from)
1955 zero_user_segments(page,
1956 to, block_end,
1957 block_start, from);
1958 continue;
1959 }
1960 }
1961 if (PageUptodate(page)) {
1962 if (!buffer_uptodate(bh))
1963 set_buffer_uptodate(bh);
1964 continue;
1965 }
1966 if (!buffer_uptodate(bh) && !buffer_delay(bh) &&
1967 !buffer_unwritten(bh) &&
1968 (block_start < from || block_end > to)) {
1969 ll_rw_block(READ, 1, &bh);
1970 *wait_bh++=bh;
1971 }
1972 }
1973
1974
1975
1976 while(wait_bh > wait) {
1977 wait_on_buffer(*--wait_bh);
1978 if (!buffer_uptodate(*wait_bh))
1979 err = -EIO;
1980 }
1981 if (unlikely(err))
1982 page_zero_new_buffers(page, from, to);
1983 return err;
1984}
1985EXPORT_SYMBOL(__block_write_begin);
1986
1987static int __block_commit_write(struct inode *inode, struct page *page,
1988 unsigned from, unsigned to)
1989{
1990 unsigned block_start, block_end;
1991 int partial = 0;
1992 unsigned blocksize;
1993 struct buffer_head *bh, *head;
1994
1995 bh = head = page_buffers(page);
1996 blocksize = bh->b_size;
1997
1998 block_start = 0;
1999 do {
2000 block_end = block_start + blocksize;
2001 if (block_end <= from || block_start >= to) {
2002 if (!buffer_uptodate(bh))
2003 partial = 1;
2004 } else {
2005 set_buffer_uptodate(bh);
2006 mark_buffer_dirty(bh);
2007 }
2008 clear_buffer_new(bh);
2009
2010 block_start = block_end;
2011 bh = bh->b_this_page;
2012 } while (bh != head);
2013
2014
2015
2016
2017
2018
2019
2020 if (!partial)
2021 SetPageUptodate(page);
2022 return 0;
2023}
2024
2025
2026
2027
2028
2029
2030
2031int block_write_begin(struct address_space *mapping, loff_t pos, unsigned len,
2032 unsigned flags, struct page **pagep, get_block_t *get_block)
2033{
2034 pgoff_t index = pos >> PAGE_CACHE_SHIFT;
2035 struct page *page;
2036 int status;
2037
2038 page = grab_cache_page_write_begin(mapping, index, flags);
2039 if (!page)
2040 return -ENOMEM;
2041
2042 status = __block_write_begin(page, pos, len, get_block);
2043 if (unlikely(status)) {
2044 unlock_page(page);
2045 page_cache_release(page);
2046 page = NULL;
2047 }
2048
2049 *pagep = page;
2050 return status;
2051}
2052EXPORT_SYMBOL(block_write_begin);
2053
2054int block_write_end(struct file *file, struct address_space *mapping,
2055 loff_t pos, unsigned len, unsigned copied,
2056 struct page *page, void *fsdata)
2057{
2058 struct inode *inode = mapping->host;
2059 unsigned start;
2060
2061 start = pos & (PAGE_CACHE_SIZE - 1);
2062
2063 if (unlikely(copied < len)) {
2064
2065
2066
2067
2068
2069
2070
2071
2072
2073
2074
2075
2076 if (!PageUptodate(page))
2077 copied = 0;
2078
2079 page_zero_new_buffers(page, start+copied, start+len);
2080 }
2081 flush_dcache_page(page);
2082
2083
2084 __block_commit_write(inode, page, start, start+copied);
2085
2086 return copied;
2087}
2088EXPORT_SYMBOL(block_write_end);
2089
2090int generic_write_end(struct file *file, struct address_space *mapping,
2091 loff_t pos, unsigned len, unsigned copied,
2092 struct page *page, void *fsdata)
2093{
2094 struct inode *inode = mapping->host;
2095 loff_t old_size = inode->i_size;
2096 int i_size_changed = 0;
2097
2098 copied = block_write_end(file, mapping, pos, len, copied, page, fsdata);
2099
2100
2101
2102
2103
2104
2105
2106
2107 if (pos+copied > inode->i_size) {
2108 i_size_write(inode, pos+copied);
2109 i_size_changed = 1;
2110 }
2111
2112 unlock_page(page);
2113 page_cache_release(page);
2114
2115 if (old_size < pos)
2116 pagecache_isize_extended(inode, old_size, pos);
2117
2118
2119
2120
2121
2122
2123 if (i_size_changed)
2124 mark_inode_dirty(inode);
2125
2126 return copied;
2127}
2128EXPORT_SYMBOL(generic_write_end);
2129
2130
2131
2132
2133
2134
2135
2136
2137int block_is_partially_uptodate(struct page *page, unsigned long from,
2138 unsigned long count)
2139{
2140 unsigned block_start, block_end, blocksize;
2141 unsigned to;
2142 struct buffer_head *bh, *head;
2143 int ret = 1;
2144
2145 if (!page_has_buffers(page))
2146 return 0;
2147
2148 head = page_buffers(page);
2149 blocksize = head->b_size;
2150 to = min_t(unsigned, PAGE_CACHE_SIZE - from, count);
2151 to = from + to;
2152 if (from < blocksize && to > PAGE_CACHE_SIZE - blocksize)
2153 return 0;
2154
2155 bh = head;
2156 block_start = 0;
2157 do {
2158 block_end = block_start + blocksize;
2159 if (block_end > from && block_start < to) {
2160 if (!buffer_uptodate(bh)) {
2161 ret = 0;
2162 break;
2163 }
2164 if (block_end >= to)
2165 break;
2166 }
2167 block_start = block_end;
2168 bh = bh->b_this_page;
2169 } while (bh != head);
2170
2171 return ret;
2172}
2173EXPORT_SYMBOL(block_is_partially_uptodate);
2174
2175
2176
2177
2178
2179
2180
2181
2182int block_read_full_page(struct page *page, get_block_t *get_block)
2183{
2184 struct inode *inode = page->mapping->host;
2185 sector_t iblock, lblock;
2186 struct buffer_head *bh, *head, *arr[MAX_BUF_PER_PAGE];
2187 unsigned int blocksize, bbits;
2188 int nr, i;
2189 int fully_mapped = 1;
2190
2191 head = create_page_buffers(page, inode, 0);
2192 blocksize = head->b_size;
2193 bbits = block_size_bits(blocksize);
2194
2195 iblock = (sector_t)page->index << (PAGE_CACHE_SHIFT - bbits);
2196 lblock = (i_size_read(inode)+blocksize-1) >> bbits;
2197 bh = head;
2198 nr = 0;
2199 i = 0;
2200
2201 do {
2202 if (buffer_uptodate(bh))
2203 continue;
2204
2205 if (!buffer_mapped(bh)) {
2206 int err = 0;
2207
2208 fully_mapped = 0;
2209 if (iblock < lblock) {
2210 WARN_ON(bh->b_size != blocksize);
2211 err = get_block(inode, iblock, bh, 0);
2212 if (err)
2213 SetPageError(page);
2214 }
2215 if (!buffer_mapped(bh)) {
2216 zero_user(page, i * blocksize, blocksize);
2217 if (!err)
2218 set_buffer_uptodate(bh);
2219 continue;
2220 }
2221
2222
2223
2224
2225 if (buffer_uptodate(bh))
2226 continue;
2227 }
2228 arr[nr++] = bh;
2229 } while (i++, iblock++, (bh = bh->b_this_page) != head);
2230
2231 if (fully_mapped)
2232 SetPageMappedToDisk(page);
2233
2234 if (!nr) {
2235
2236
2237
2238
2239 if (!PageError(page))
2240 SetPageUptodate(page);
2241 unlock_page(page);
2242 return 0;
2243 }
2244
2245
2246 for (i = 0; i < nr; i++) {
2247 bh = arr[i];
2248 lock_buffer(bh);
2249 mark_buffer_async_read(bh);
2250 }
2251
2252
2253
2254
2255
2256
2257 for (i = 0; i < nr; i++) {
2258 bh = arr[i];
2259 if (buffer_uptodate(bh))
2260 end_buffer_async_read(bh, 1);
2261 else
2262 submit_bh(READ, bh);
2263 }
2264 return 0;
2265}
2266EXPORT_SYMBOL(block_read_full_page);
2267
2268
2269
2270
2271
2272int generic_cont_expand_simple(struct inode *inode, loff_t size)
2273{
2274 struct address_space *mapping = inode->i_mapping;
2275 struct page *page;
2276 void *fsdata;
2277 int err;
2278
2279 err = inode_newsize_ok(inode, size);
2280 if (err)
2281 goto out;
2282
2283 err = pagecache_write_begin(NULL, mapping, size, 0,
2284 AOP_FLAG_UNINTERRUPTIBLE|AOP_FLAG_CONT_EXPAND,
2285 &page, &fsdata);
2286 if (err)
2287 goto out;
2288
2289 err = pagecache_write_end(NULL, mapping, size, 0, 0, page, fsdata);
2290 BUG_ON(err > 0);
2291
2292out:
2293 return err;
2294}
2295EXPORT_SYMBOL(generic_cont_expand_simple);
2296
2297static int cont_expand_zero(struct file *file, struct address_space *mapping,
2298 loff_t pos, loff_t *bytes)
2299{
2300 struct inode *inode = mapping->host;
2301 unsigned blocksize = 1 << inode->i_blkbits;
2302 struct page *page;
2303 void *fsdata;
2304 pgoff_t index, curidx;
2305 loff_t curpos;
2306 unsigned zerofrom, offset, len;
2307 int err = 0;
2308
2309 index = pos >> PAGE_CACHE_SHIFT;
2310 offset = pos & ~PAGE_CACHE_MASK;
2311
2312 while (index > (curidx = (curpos = *bytes)>>PAGE_CACHE_SHIFT)) {
2313 zerofrom = curpos & ~PAGE_CACHE_MASK;
2314 if (zerofrom & (blocksize-1)) {
2315 *bytes |= (blocksize-1);
2316 (*bytes)++;
2317 }
2318 len = PAGE_CACHE_SIZE - zerofrom;
2319
2320 err = pagecache_write_begin(file, mapping, curpos, len,
2321 AOP_FLAG_UNINTERRUPTIBLE,
2322 &page, &fsdata);
2323 if (err)
2324 goto out;
2325 zero_user(page, zerofrom, len);
2326 err = pagecache_write_end(file, mapping, curpos, len, len,
2327 page, fsdata);
2328 if (err < 0)
2329 goto out;
2330 BUG_ON(err != len);
2331 err = 0;
2332
2333 balance_dirty_pages_ratelimited(mapping);
2334
2335 if (unlikely(fatal_signal_pending(current))) {
2336 err = -EINTR;
2337 goto out;
2338 }
2339 }
2340
2341
2342 if (index == curidx) {
2343 zerofrom = curpos & ~PAGE_CACHE_MASK;
2344
2345 if (offset <= zerofrom) {
2346 goto out;
2347 }
2348 if (zerofrom & (blocksize-1)) {
2349 *bytes |= (blocksize-1);
2350 (*bytes)++;
2351 }
2352 len = offset - zerofrom;
2353
2354 err = pagecache_write_begin(file, mapping, curpos, len,
2355 AOP_FLAG_UNINTERRUPTIBLE,
2356 &page, &fsdata);
2357 if (err)
2358 goto out;
2359 zero_user(page, zerofrom, len);
2360 err = pagecache_write_end(file, mapping, curpos, len, len,
2361 page, fsdata);
2362 if (err < 0)
2363 goto out;
2364 BUG_ON(err != len);
2365 err = 0;
2366 }
2367out:
2368 return err;
2369}
2370
2371
2372
2373
2374
2375int cont_write_begin(struct file *file, struct address_space *mapping,
2376 loff_t pos, unsigned len, unsigned flags,
2377 struct page **pagep, void **fsdata,
2378 get_block_t *get_block, loff_t *bytes)
2379{
2380 struct inode *inode = mapping->host;
2381 unsigned blocksize = 1 << inode->i_blkbits;
2382 unsigned zerofrom;
2383 int err;
2384
2385 err = cont_expand_zero(file, mapping, pos, bytes);
2386 if (err)
2387 return err;
2388
2389 zerofrom = *bytes & ~PAGE_CACHE_MASK;
2390 if (pos+len > *bytes && zerofrom & (blocksize-1)) {
2391 *bytes |= (blocksize-1);
2392 (*bytes)++;
2393 }
2394
2395 return block_write_begin(mapping, pos, len, flags, pagep, get_block);
2396}
2397EXPORT_SYMBOL(cont_write_begin);
2398
2399int block_commit_write(struct page *page, unsigned from, unsigned to)
2400{
2401 struct inode *inode = page->mapping->host;
2402 __block_commit_write(inode,page,from,to);
2403 return 0;
2404}
2405EXPORT_SYMBOL(block_commit_write);
2406
2407
2408
2409
2410
2411
2412
2413
2414
2415
2416
2417
2418
2419
2420
2421
2422
2423
2424
2425int block_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf,
2426 get_block_t get_block)
2427{
2428 struct page *page = vmf->page;
2429 struct inode *inode = file_inode(vma->vm_file);
2430 unsigned long end;
2431 loff_t size;
2432 int ret;
2433
2434 lock_page(page);
2435 size = i_size_read(inode);
2436 if ((page->mapping != inode->i_mapping) ||
2437 (page_offset(page) > size)) {
2438
2439 ret = -EFAULT;
2440 goto out_unlock;
2441 }
2442
2443
2444 if (((page->index + 1) << PAGE_CACHE_SHIFT) > size)
2445 end = size & ~PAGE_CACHE_MASK;
2446 else
2447 end = PAGE_CACHE_SIZE;
2448
2449 ret = __block_write_begin(page, 0, end, get_block);
2450 if (!ret)
2451 ret = block_commit_write(page, 0, end);
2452
2453 if (unlikely(ret < 0))
2454 goto out_unlock;
2455 set_page_dirty(page);
2456 wait_for_stable_page(page);
2457 return 0;
2458out_unlock:
2459 unlock_page(page);
2460 return ret;
2461}
2462EXPORT_SYMBOL(block_page_mkwrite);
2463
2464
2465
2466
2467
2468
2469static void end_buffer_read_nobh(struct buffer_head *bh, int uptodate)
2470{
2471 __end_buffer_read_notouch(bh, uptodate);
2472}
2473
2474
2475
2476
2477
2478
2479static void attach_nobh_buffers(struct page *page, struct buffer_head *head)
2480{
2481 struct buffer_head *bh;
2482
2483 BUG_ON(!PageLocked(page));
2484
2485 spin_lock(&page->mapping->private_lock);
2486 bh = head;
2487 do {
2488 if (PageDirty(page))
2489 set_buffer_dirty(bh);
2490 if (!bh->b_this_page)
2491 bh->b_this_page = head;
2492 bh = bh->b_this_page;
2493 } while (bh != head);
2494 attach_page_buffers(page, head);
2495 spin_unlock(&page->mapping->private_lock);
2496}
2497
2498
2499
2500
2501
2502
2503int nobh_write_begin(struct address_space *mapping,
2504 loff_t pos, unsigned len, unsigned flags,
2505 struct page **pagep, void **fsdata,
2506 get_block_t *get_block)
2507{
2508 struct inode *inode = mapping->host;
2509 const unsigned blkbits = inode->i_blkbits;
2510 const unsigned blocksize = 1 << blkbits;
2511 struct buffer_head *head, *bh;
2512 struct page *page;
2513 pgoff_t index;
2514 unsigned from, to;
2515 unsigned block_in_page;
2516 unsigned block_start, block_end;
2517 sector_t block_in_file;
2518 int nr_reads = 0;
2519 int ret = 0;
2520 int is_mapped_to_disk = 1;
2521
2522 index = pos >> PAGE_CACHE_SHIFT;
2523 from = pos & (PAGE_CACHE_SIZE - 1);
2524 to = from + len;
2525
2526 page = grab_cache_page_write_begin(mapping, index, flags);
2527 if (!page)
2528 return -ENOMEM;
2529 *pagep = page;
2530 *fsdata = NULL;
2531
2532 if (page_has_buffers(page)) {
2533 ret = __block_write_begin(page, pos, len, get_block);
2534 if (unlikely(ret))
2535 goto out_release;
2536 return ret;
2537 }
2538
2539 if (PageMappedToDisk(page))
2540 return 0;
2541
2542
2543
2544
2545
2546
2547
2548
2549
2550
2551 head = alloc_page_buffers(page, blocksize, 0);
2552 if (!head) {
2553 ret = -ENOMEM;
2554 goto out_release;
2555 }
2556
2557 block_in_file = (sector_t)page->index << (PAGE_CACHE_SHIFT - blkbits);
2558
2559
2560
2561
2562
2563
2564 for (block_start = 0, block_in_page = 0, bh = head;
2565 block_start < PAGE_CACHE_SIZE;
2566 block_in_page++, block_start += blocksize, bh = bh->b_this_page) {
2567 int create;
2568
2569 block_end = block_start + blocksize;
2570 bh->b_state = 0;
2571 create = 1;
2572 if (block_start >= to)
2573 create = 0;
2574 ret = get_block(inode, block_in_file + block_in_page,
2575 bh, create);
2576 if (ret)
2577 goto failed;
2578 if (!buffer_mapped(bh))
2579 is_mapped_to_disk = 0;
2580 if (buffer_new(bh))
2581 unmap_underlying_metadata(bh->b_bdev, bh->b_blocknr);
2582 if (PageUptodate(page)) {
2583 set_buffer_uptodate(bh);
2584 continue;
2585 }
2586 if (buffer_new(bh) || !buffer_mapped(bh)) {
2587 zero_user_segments(page, block_start, from,
2588 to, block_end);
2589 continue;
2590 }
2591 if (buffer_uptodate(bh))
2592 continue;
2593 if (block_start < from || block_end > to) {
2594 lock_buffer(bh);
2595 bh->b_end_io = end_buffer_read_nobh;
2596 submit_bh(READ, bh);
2597 nr_reads++;
2598 }
2599 }
2600
2601 if (nr_reads) {
2602
2603
2604
2605
2606
2607 for (bh = head; bh; bh = bh->b_this_page) {
2608 wait_on_buffer(bh);
2609 if (!buffer_uptodate(bh))
2610 ret = -EIO;
2611 }
2612 if (ret)
2613 goto failed;
2614 }
2615
2616 if (is_mapped_to_disk)
2617 SetPageMappedToDisk(page);
2618
2619 *fsdata = head;
2620
2621 return 0;
2622
2623failed:
2624 BUG_ON(!ret);
2625
2626
2627
2628
2629
2630
2631
2632 attach_nobh_buffers(page, head);
2633 page_zero_new_buffers(page, from, to);
2634
2635out_release:
2636 unlock_page(page);
2637 page_cache_release(page);
2638 *pagep = NULL;
2639
2640 return ret;
2641}
2642EXPORT_SYMBOL(nobh_write_begin);
2643
2644int nobh_write_end(struct file *file, struct address_space *mapping,
2645 loff_t pos, unsigned len, unsigned copied,
2646 struct page *page, void *fsdata)
2647{
2648 struct inode *inode = page->mapping->host;
2649 struct buffer_head *head = fsdata;
2650 struct buffer_head *bh;
2651 BUG_ON(fsdata != NULL && page_has_buffers(page));
2652
2653 if (unlikely(copied < len) && head)
2654 attach_nobh_buffers(page, head);
2655 if (page_has_buffers(page))
2656 return generic_write_end(file, mapping, pos, len,
2657 copied, page, fsdata);
2658
2659 SetPageUptodate(page);
2660 set_page_dirty(page);
2661 if (pos+copied > inode->i_size) {
2662 i_size_write(inode, pos+copied);
2663 mark_inode_dirty(inode);
2664 }
2665
2666 unlock_page(page);
2667 page_cache_release(page);
2668
2669 while (head) {
2670 bh = head;
2671 head = head->b_this_page;
2672 free_buffer_head(bh);
2673 }
2674
2675 return copied;
2676}
2677EXPORT_SYMBOL(nobh_write_end);
2678
2679
2680
2681
2682
2683
2684int nobh_writepage(struct page *page, get_block_t *get_block,
2685 struct writeback_control *wbc)
2686{
2687 struct inode * const inode = page->mapping->host;
2688 loff_t i_size = i_size_read(inode);
2689 const pgoff_t end_index = i_size >> PAGE_CACHE_SHIFT;
2690 unsigned offset;
2691 int ret;
2692
2693
2694 if (page->index < end_index)
2695 goto out;
2696
2697
2698 offset = i_size & (PAGE_CACHE_SIZE-1);
2699 if (page->index >= end_index+1 || !offset) {
2700
2701
2702
2703
2704
2705#if 0
2706
2707 if (page->mapping->a_ops->invalidatepage)
2708 page->mapping->a_ops->invalidatepage(page, offset);
2709#endif
2710 unlock_page(page);
2711 return 0;
2712 }
2713
2714
2715
2716
2717
2718
2719
2720
2721 zero_user_segment(page, offset, PAGE_CACHE_SIZE);
2722out:
2723 ret = mpage_writepage(page, get_block, wbc);
2724 if (ret == -EAGAIN)
2725 ret = __block_write_full_page(inode, page, get_block, wbc,
2726 end_buffer_async_write);
2727 return ret;
2728}
2729EXPORT_SYMBOL(nobh_writepage);
2730
2731int nobh_truncate_page(struct address_space *mapping,
2732 loff_t from, get_block_t *get_block)
2733{
2734 pgoff_t index = from >> PAGE_CACHE_SHIFT;
2735 unsigned offset = from & (PAGE_CACHE_SIZE-1);
2736 unsigned blocksize;
2737 sector_t iblock;
2738 unsigned length, pos;
2739 struct inode *inode = mapping->host;
2740 struct page *page;
2741 struct buffer_head map_bh;
2742 int err;
2743
2744 blocksize = 1 << inode->i_blkbits;
2745 length = offset & (blocksize - 1);
2746
2747
2748 if (!length)
2749 return 0;
2750
2751 length = blocksize - length;
2752 iblock = (sector_t)index << (PAGE_CACHE_SHIFT - inode->i_blkbits);
2753
2754 page = grab_cache_page(mapping, index);
2755 err = -ENOMEM;
2756 if (!page)
2757 goto out;
2758
2759 if (page_has_buffers(page)) {
2760has_buffers:
2761 unlock_page(page);
2762 page_cache_release(page);
2763 return block_truncate_page(mapping, from, get_block);
2764 }
2765
2766
2767 pos = blocksize;
2768 while (offset >= pos) {
2769 iblock++;
2770 pos += blocksize;
2771 }
2772
2773 map_bh.b_size = blocksize;
2774 map_bh.b_state = 0;
2775 err = get_block(inode, iblock, &map_bh, 0);
2776 if (err)
2777 goto unlock;
2778
2779 if (!buffer_mapped(&map_bh))
2780 goto unlock;
2781
2782
2783 if (!PageUptodate(page)) {
2784 err = mapping->a_ops->readpage(NULL, page);
2785 if (err) {
2786 page_cache_release(page);
2787 goto out;
2788 }
2789 lock_page(page);
2790 if (!PageUptodate(page)) {
2791 err = -EIO;
2792 goto unlock;
2793 }
2794 if (page_has_buffers(page))
2795 goto has_buffers;
2796 }
2797 zero_user(page, offset, length);
2798 set_page_dirty(page);
2799 err = 0;
2800
2801unlock:
2802 unlock_page(page);
2803 page_cache_release(page);
2804out:
2805 return err;
2806}
2807EXPORT_SYMBOL(nobh_truncate_page);
2808
2809int block_truncate_page(struct address_space *mapping,
2810 loff_t from, get_block_t *get_block)
2811{
2812 pgoff_t index = from >> PAGE_CACHE_SHIFT;
2813 unsigned offset = from & (PAGE_CACHE_SIZE-1);
2814 unsigned blocksize;
2815 sector_t iblock;
2816 unsigned length, pos;
2817 struct inode *inode = mapping->host;
2818 struct page *page;
2819 struct buffer_head *bh;
2820 int err;
2821
2822 blocksize = 1 << inode->i_blkbits;
2823 length = offset & (blocksize - 1);
2824
2825
2826 if (!length)
2827 return 0;
2828
2829 length = blocksize - length;
2830 iblock = (sector_t)index << (PAGE_CACHE_SHIFT - inode->i_blkbits);
2831
2832 page = grab_cache_page(mapping, index);
2833 err = -ENOMEM;
2834 if (!page)
2835 goto out;
2836
2837 if (!page_has_buffers(page))
2838 create_empty_buffers(page, blocksize, 0);
2839
2840
2841 bh = page_buffers(page);
2842 pos = blocksize;
2843 while (offset >= pos) {
2844 bh = bh->b_this_page;
2845 iblock++;
2846 pos += blocksize;
2847 }
2848
2849 err = 0;
2850 if (!buffer_mapped(bh)) {
2851 WARN_ON(bh->b_size != blocksize);
2852 err = get_block(inode, iblock, bh, 0);
2853 if (err)
2854 goto unlock;
2855
2856 if (!buffer_mapped(bh))
2857 goto unlock;
2858 }
2859
2860
2861 if (PageUptodate(page))
2862 set_buffer_uptodate(bh);
2863
2864 if (!buffer_uptodate(bh) && !buffer_delay(bh) && !buffer_unwritten(bh)) {
2865 err = -EIO;
2866 ll_rw_block(READ, 1, &bh);
2867 wait_on_buffer(bh);
2868
2869 if (!buffer_uptodate(bh))
2870 goto unlock;
2871 }
2872
2873 zero_user(page, offset, length);
2874 mark_buffer_dirty(bh);
2875 err = 0;
2876
2877unlock:
2878 unlock_page(page);
2879 page_cache_release(page);
2880out:
2881 return err;
2882}
2883EXPORT_SYMBOL(block_truncate_page);
2884
2885
2886
2887
2888int block_write_full_page(struct page *page, get_block_t *get_block,
2889 struct writeback_control *wbc)
2890{
2891 struct inode * const inode = page->mapping->host;
2892 loff_t i_size = i_size_read(inode);
2893 const pgoff_t end_index = i_size >> PAGE_CACHE_SHIFT;
2894 unsigned offset;
2895
2896
2897 if (page->index < end_index)
2898 return __block_write_full_page(inode, page, get_block, wbc,
2899 end_buffer_async_write);
2900
2901
2902 offset = i_size & (PAGE_CACHE_SIZE-1);
2903 if (page->index >= end_index+1 || !offset) {
2904
2905
2906
2907
2908
2909 do_invalidatepage(page, 0, PAGE_CACHE_SIZE);
2910 unlock_page(page);
2911 return 0;
2912 }
2913
2914
2915
2916
2917
2918
2919
2920
2921 zero_user_segment(page, offset, PAGE_CACHE_SIZE);
2922 return __block_write_full_page(inode, page, get_block, wbc,
2923 end_buffer_async_write);
2924}
2925EXPORT_SYMBOL(block_write_full_page);
2926
2927sector_t generic_block_bmap(struct address_space *mapping, sector_t block,
2928 get_block_t *get_block)
2929{
2930 struct buffer_head tmp;
2931 struct inode *inode = mapping->host;
2932 tmp.b_state = 0;
2933 tmp.b_blocknr = 0;
2934 tmp.b_size = 1 << inode->i_blkbits;
2935 get_block(inode, block, &tmp, 0);
2936 return tmp.b_blocknr;
2937}
2938EXPORT_SYMBOL(generic_block_bmap);
2939
2940static void end_bio_bh_io_sync(struct bio *bio)
2941{
2942 struct buffer_head *bh = bio->bi_private;
2943
2944 if (unlikely(bio_flagged(bio, BIO_QUIET)))
2945 set_bit(BH_Quiet, &bh->b_state);
2946
2947 bh->b_end_io(bh, !bio->bi_error);
2948 bio_put(bio);
2949}
2950
2951
2952
2953
2954
2955
2956
2957
2958
2959
2960
2961
2962
2963void guard_bio_eod(int rw, struct bio *bio)
2964{
2965 sector_t maxsector;
2966 struct bio_vec *bvec = &bio->bi_io_vec[bio->bi_vcnt - 1];
2967 unsigned truncated_bytes;
2968
2969 maxsector = i_size_read(bio->bi_bdev->bd_inode) >> 9;
2970 if (!maxsector)
2971 return;
2972
2973
2974
2975
2976
2977
2978 if (unlikely(bio->bi_iter.bi_sector >= maxsector))
2979 return;
2980
2981 maxsector -= bio->bi_iter.bi_sector;
2982 if (likely((bio->bi_iter.bi_size >> 9) <= maxsector))
2983 return;
2984
2985
2986 truncated_bytes = bio->bi_iter.bi_size - (maxsector << 9);
2987
2988
2989 bio->bi_iter.bi_size -= truncated_bytes;
2990 bvec->bv_len -= truncated_bytes;
2991
2992
2993 if ((rw & RW_MASK) == READ) {
2994 zero_user(bvec->bv_page, bvec->bv_offset + bvec->bv_len,
2995 truncated_bytes);
2996 }
2997}
2998
2999static int submit_bh_wbc(int rw, struct buffer_head *bh,
3000 unsigned long bio_flags, struct writeback_control *wbc)
3001{
3002 struct bio *bio;
3003
3004 BUG_ON(!buffer_locked(bh));
3005 BUG_ON(!buffer_mapped(bh));
3006 BUG_ON(!bh->b_end_io);
3007 BUG_ON(buffer_delay(bh));
3008 BUG_ON(buffer_unwritten(bh));
3009
3010
3011
3012
3013 if (test_set_buffer_req(bh) && (rw & WRITE))
3014 clear_buffer_write_io_error(bh);
3015
3016
3017
3018
3019
3020 bio = bio_alloc(GFP_NOIO, 1);
3021
3022 if (wbc) {
3023 wbc_init_bio(wbc, bio);
3024 wbc_account_io(wbc, bh->b_page, bh->b_size);
3025 }
3026
3027 bio->bi_iter.bi_sector = bh->b_blocknr * (bh->b_size >> 9);
3028 bio->bi_bdev = bh->b_bdev;
3029
3030 bio_add_page(bio, bh->b_page, bh->b_size, bh_offset(bh));
3031 BUG_ON(bio->bi_iter.bi_size != bh->b_size);
3032
3033 bio->bi_end_io = end_bio_bh_io_sync;
3034 bio->bi_private = bh;
3035 bio->bi_flags |= bio_flags;
3036
3037
3038 guard_bio_eod(rw, bio);
3039
3040 if (buffer_meta(bh))
3041 rw |= REQ_META;
3042 if (buffer_prio(bh))
3043 rw |= REQ_PRIO;
3044
3045 submit_bio(rw, bio);
3046 return 0;
3047}
3048
3049int _submit_bh(int rw, struct buffer_head *bh, unsigned long bio_flags)
3050{
3051 return submit_bh_wbc(rw, bh, bio_flags, NULL);
3052}
3053EXPORT_SYMBOL_GPL(_submit_bh);
3054
3055int submit_bh(int rw, struct buffer_head *bh)
3056{
3057 return submit_bh_wbc(rw, bh, 0, NULL);
3058}
3059EXPORT_SYMBOL(submit_bh);
3060
3061
3062
3063
3064
3065
3066
3067
3068
3069
3070
3071
3072
3073
3074
3075
3076
3077
3078
3079
3080
3081
3082
3083
3084
3085
3086void ll_rw_block(int rw, int nr, struct buffer_head *bhs[])
3087{
3088 int i;
3089
3090 for (i = 0; i < nr; i++) {
3091 struct buffer_head *bh = bhs[i];
3092
3093 if (!trylock_buffer(bh))
3094 continue;
3095 if (rw == WRITE) {
3096 if (test_clear_buffer_dirty(bh)) {
3097 bh->b_end_io = end_buffer_write_sync;
3098 get_bh(bh);
3099 submit_bh(WRITE, bh);
3100 continue;
3101 }
3102 } else {
3103 if (!buffer_uptodate(bh)) {
3104 bh->b_end_io = end_buffer_read_sync;
3105 get_bh(bh);
3106 submit_bh(rw, bh);
3107 continue;
3108 }
3109 }
3110 unlock_buffer(bh);
3111 }
3112}
3113EXPORT_SYMBOL(ll_rw_block);
3114
3115void write_dirty_buffer(struct buffer_head *bh, int rw)
3116{
3117 lock_buffer(bh);
3118 if (!test_clear_buffer_dirty(bh)) {
3119 unlock_buffer(bh);
3120 return;
3121 }
3122 bh->b_end_io = end_buffer_write_sync;
3123 get_bh(bh);
3124 submit_bh(rw, bh);
3125}
3126EXPORT_SYMBOL(write_dirty_buffer);
3127
3128
3129
3130
3131
3132
3133int __sync_dirty_buffer(struct buffer_head *bh, int rw)
3134{
3135 int ret = 0;
3136
3137 WARN_ON(atomic_read(&bh->b_count) < 1);
3138 lock_buffer(bh);
3139 if (test_clear_buffer_dirty(bh)) {
3140 get_bh(bh);
3141 bh->b_end_io = end_buffer_write_sync;
3142 ret = submit_bh(rw, bh);
3143 wait_on_buffer(bh);
3144 if (!ret && !buffer_uptodate(bh))
3145 ret = -EIO;
3146 } else {
3147 unlock_buffer(bh);
3148 }
3149 return ret;
3150}
3151EXPORT_SYMBOL(__sync_dirty_buffer);
3152
3153int sync_dirty_buffer(struct buffer_head *bh)
3154{
3155 return __sync_dirty_buffer(bh, WRITE_SYNC);
3156}
3157EXPORT_SYMBOL(sync_dirty_buffer);
3158
3159
3160
3161
3162
3163
3164
3165
3166
3167
3168
3169
3170
3171
3172
3173
3174
3175
3176
3177
3178
3179static inline int buffer_busy(struct buffer_head *bh)
3180{
3181 return atomic_read(&bh->b_count) |
3182 (bh->b_state & ((1 << BH_Dirty) | (1 << BH_Lock)));
3183}
3184
3185static int
3186drop_buffers(struct page *page, struct buffer_head **buffers_to_free)
3187{
3188 struct buffer_head *head = page_buffers(page);
3189 struct buffer_head *bh;
3190
3191 bh = head;
3192 do {
3193 if (buffer_write_io_error(bh) && page->mapping)
3194 set_bit(AS_EIO, &page->mapping->flags);
3195 if (buffer_busy(bh))
3196 goto failed;
3197 bh = bh->b_this_page;
3198 } while (bh != head);
3199
3200 do {
3201 struct buffer_head *next = bh->b_this_page;
3202
3203 if (bh->b_assoc_map)
3204 __remove_assoc_queue(bh);
3205 bh = next;
3206 } while (bh != head);
3207 *buffers_to_free = head;
3208 __clear_page_buffers(page);
3209 return 1;
3210failed:
3211 return 0;
3212}
3213
3214int try_to_free_buffers(struct page *page)
3215{
3216 struct address_space * const mapping = page->mapping;
3217 struct buffer_head *buffers_to_free = NULL;
3218 int ret = 0;
3219
3220 BUG_ON(!PageLocked(page));
3221 if (PageWriteback(page))
3222 return 0;
3223
3224 if (mapping == NULL) {
3225 ret = drop_buffers(page, &buffers_to_free);
3226 goto out;
3227 }
3228
3229 spin_lock(&mapping->private_lock);
3230 ret = drop_buffers(page, &buffers_to_free);
3231
3232
3233
3234
3235
3236
3237
3238
3239
3240
3241
3242
3243
3244
3245
3246 if (ret)
3247 cancel_dirty_page(page);
3248 spin_unlock(&mapping->private_lock);
3249out:
3250 if (buffers_to_free) {
3251 struct buffer_head *bh = buffers_to_free;
3252
3253 do {
3254 struct buffer_head *next = bh->b_this_page;
3255 free_buffer_head(bh);
3256 bh = next;
3257 } while (bh != buffers_to_free);
3258 }
3259 return ret;
3260}
3261EXPORT_SYMBOL(try_to_free_buffers);
3262
3263
3264
3265
3266
3267
3268
3269
3270SYSCALL_DEFINE2(bdflush, int, func, long, data)
3271{
3272 static int msg_count;
3273
3274 if (!capable(CAP_SYS_ADMIN))
3275 return -EPERM;
3276
3277 if (msg_count < 5) {
3278 msg_count++;
3279 printk(KERN_INFO
3280 "warning: process `%s' used the obsolete bdflush"
3281 " system call\n", current->comm);
3282 printk(KERN_INFO "Fix your initscripts?\n");
3283 }
3284
3285 if (func == 1)
3286 do_exit(0);
3287 return 0;
3288}
3289
3290
3291
3292
3293static struct kmem_cache *bh_cachep __read_mostly;
3294
3295
3296
3297
3298
3299static unsigned long max_buffer_heads;
3300
3301int buffer_heads_over_limit;
3302
3303struct bh_accounting {
3304 int nr;
3305 int ratelimit;
3306};
3307
3308static DEFINE_PER_CPU(struct bh_accounting, bh_accounting) = {0, 0};
3309
3310static void recalc_bh_state(void)
3311{
3312 int i;
3313 int tot = 0;
3314
3315 if (__this_cpu_inc_return(bh_accounting.ratelimit) - 1 < 4096)
3316 return;
3317 __this_cpu_write(bh_accounting.ratelimit, 0);
3318 for_each_online_cpu(i)
3319 tot += per_cpu(bh_accounting, i).nr;
3320 buffer_heads_over_limit = (tot > max_buffer_heads);
3321}
3322
3323struct buffer_head *alloc_buffer_head(gfp_t gfp_flags)
3324{
3325 struct buffer_head *ret = kmem_cache_zalloc(bh_cachep, gfp_flags);
3326 if (ret) {
3327 INIT_LIST_HEAD(&ret->b_assoc_buffers);
3328 preempt_disable();
3329 __this_cpu_inc(bh_accounting.nr);
3330 recalc_bh_state();
3331 preempt_enable();
3332 }
3333 return ret;
3334}
3335EXPORT_SYMBOL(alloc_buffer_head);
3336
3337void free_buffer_head(struct buffer_head *bh)
3338{
3339 BUG_ON(!list_empty(&bh->b_assoc_buffers));
3340 kmem_cache_free(bh_cachep, bh);
3341 preempt_disable();
3342 __this_cpu_dec(bh_accounting.nr);
3343 recalc_bh_state();
3344 preempt_enable();
3345}
3346EXPORT_SYMBOL(free_buffer_head);
3347
3348static void buffer_exit_cpu(int cpu)
3349{
3350 int i;
3351 struct bh_lru *b = &per_cpu(bh_lrus, cpu);
3352
3353 for (i = 0; i < BH_LRU_SIZE; i++) {
3354 brelse(b->bhs[i]);
3355 b->bhs[i] = NULL;
3356 }
3357 this_cpu_add(bh_accounting.nr, per_cpu(bh_accounting, cpu).nr);
3358 per_cpu(bh_accounting, cpu).nr = 0;
3359}
3360
3361static int buffer_cpu_notify(struct notifier_block *self,
3362 unsigned long action, void *hcpu)
3363{
3364 if (action == CPU_DEAD || action == CPU_DEAD_FROZEN)
3365 buffer_exit_cpu((unsigned long)hcpu);
3366 return NOTIFY_OK;
3367}
3368
3369
3370
3371
3372
3373
3374
3375
3376int bh_uptodate_or_lock(struct buffer_head *bh)
3377{
3378 if (!buffer_uptodate(bh)) {
3379 lock_buffer(bh);
3380 if (!buffer_uptodate(bh))
3381 return 0;
3382 unlock_buffer(bh);
3383 }
3384 return 1;
3385}
3386EXPORT_SYMBOL(bh_uptodate_or_lock);
3387
3388
3389
3390
3391
3392
3393
3394int bh_submit_read(struct buffer_head *bh)
3395{
3396 BUG_ON(!buffer_locked(bh));
3397
3398 if (buffer_uptodate(bh)) {
3399 unlock_buffer(bh);
3400 return 0;
3401 }
3402
3403 get_bh(bh);
3404 bh->b_end_io = end_buffer_read_sync;
3405 submit_bh(READ, bh);
3406 wait_on_buffer(bh);
3407 if (buffer_uptodate(bh))
3408 return 0;
3409 return -EIO;
3410}
3411EXPORT_SYMBOL(bh_submit_read);
3412
3413void __init buffer_init(void)
3414{
3415 unsigned long nrpages;
3416
3417 bh_cachep = kmem_cache_create("buffer_head",
3418 sizeof(struct buffer_head), 0,
3419 (SLAB_RECLAIM_ACCOUNT|SLAB_PANIC|
3420 SLAB_MEM_SPREAD),
3421 NULL);
3422
3423
3424
3425
3426 nrpages = (nr_free_buffer_pages() * 10) / 100;
3427 max_buffer_heads = nrpages * (PAGE_SIZE / sizeof(struct buffer_head));
3428 hotcpu_notifier(buffer_cpu_notify, 0);
3429}
3430