1
2
3
4
5
6
7
8#include <linux/init.h>
9#include <linux/mm.h>
10#include <linux/fcntl.h>
11#include <linux/slab.h>
12#include <linux/kmod.h>
13#include <linux/major.h>
14#include <linux/device_cgroup.h>
15#include <linux/highmem.h>
16#include <linux/blkdev.h>
17#include <linux/module.h>
18#include <linux/blkpg.h>
19#include <linux/magic.h>
20#include <linux/buffer_head.h>
21#include <linux/swap.h>
22#include <linux/pagevec.h>
23#include <linux/writeback.h>
24#include <linux/mpage.h>
25#include <linux/mount.h>
26#include <linux/uio.h>
27#include <linux/namei.h>
28#include <linux/log2.h>
29#include <linux/cleancache.h>
30#include <linux/aio.h>
31#include <asm/uaccess.h>
32#include "internal.h"
33
34struct bdev_inode {
35 struct block_device bdev;
36 struct inode vfs_inode;
37};
38
39static const struct address_space_operations def_blk_aops;
40
41static inline struct bdev_inode *BDEV_I(struct inode *inode)
42{
43 return container_of(inode, struct bdev_inode, vfs_inode);
44}
45
46inline struct block_device *I_BDEV(struct inode *inode)
47{
48 return &BDEV_I(inode)->bdev;
49}
50EXPORT_SYMBOL(I_BDEV);
51
52
53
54
55
56
57static void bdev_inode_switch_bdi(struct inode *inode,
58 struct backing_dev_info *dst)
59{
60 struct backing_dev_info *old = inode->i_data.backing_dev_info;
61
62 if (unlikely(dst == old))
63 return;
64 bdi_lock_two(&old->wb, &dst->wb);
65 spin_lock(&inode->i_lock);
66 inode->i_data.backing_dev_info = dst;
67 if (inode->i_state & I_DIRTY)
68 list_move(&inode->i_wb_list, &dst->wb.b_dirty);
69 spin_unlock(&inode->i_lock);
70 spin_unlock(&old->wb.list_lock);
71 spin_unlock(&dst->wb.list_lock);
72}
73
74
75void kill_bdev(struct block_device *bdev)
76{
77 struct address_space *mapping = bdev->bd_inode->i_mapping;
78
79 if (mapping->nrpages == 0)
80 return;
81
82 invalidate_bh_lrus();
83 truncate_inode_pages(mapping, 0);
84}
85EXPORT_SYMBOL(kill_bdev);
86
87
88void invalidate_bdev(struct block_device *bdev)
89{
90 struct address_space *mapping = bdev->bd_inode->i_mapping;
91
92 if (mapping->nrpages == 0)
93 return;
94
95 invalidate_bh_lrus();
96 lru_add_drain_all();
97 invalidate_mapping_pages(mapping, 0, -1);
98
99
100
101 cleancache_invalidate_inode(mapping);
102}
103EXPORT_SYMBOL(invalidate_bdev);
104
105int set_blocksize(struct block_device *bdev, int size)
106{
107
108 if (size > PAGE_SIZE || size < 512 || !is_power_of_2(size))
109 return -EINVAL;
110
111
112 if (size < bdev_logical_block_size(bdev))
113 return -EINVAL;
114
115
116 if (bdev->bd_block_size != size) {
117 sync_blockdev(bdev);
118 bdev->bd_block_size = size;
119 bdev->bd_inode->i_blkbits = blksize_bits(size);
120 kill_bdev(bdev);
121 }
122 return 0;
123}
124
125EXPORT_SYMBOL(set_blocksize);
126
127int sb_set_blocksize(struct super_block *sb, int size)
128{
129 if (set_blocksize(sb->s_bdev, size))
130 return 0;
131
132
133 sb->s_blocksize = size;
134 sb->s_blocksize_bits = blksize_bits(size);
135 return sb->s_blocksize;
136}
137
138EXPORT_SYMBOL(sb_set_blocksize);
139
140int sb_min_blocksize(struct super_block *sb, int size)
141{
142 int minsize = bdev_logical_block_size(sb->s_bdev);
143 if (size < minsize)
144 size = minsize;
145 return sb_set_blocksize(sb, size);
146}
147
148EXPORT_SYMBOL(sb_min_blocksize);
149
150static int
151blkdev_get_block(struct inode *inode, sector_t iblock,
152 struct buffer_head *bh, int create)
153{
154 bh->b_bdev = I_BDEV(inode);
155 bh->b_blocknr = iblock;
156 set_buffer_mapped(bh);
157 return 0;
158}
159
160static ssize_t
161blkdev_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov,
162 loff_t offset, unsigned long nr_segs)
163{
164 struct file *file = iocb->ki_filp;
165 struct inode *inode = file->f_mapping->host;
166
167 return __blockdev_direct_IO(rw, iocb, inode, I_BDEV(inode), iov, offset,
168 nr_segs, blkdev_get_block, NULL, NULL, 0);
169}
170
171int __sync_blockdev(struct block_device *bdev, int wait)
172{
173 if (!bdev)
174 return 0;
175 if (!wait)
176 return filemap_flush(bdev->bd_inode->i_mapping);
177 return filemap_write_and_wait(bdev->bd_inode->i_mapping);
178}
179
180
181
182
183
184int sync_blockdev(struct block_device *bdev)
185{
186 return __sync_blockdev(bdev, 1);
187}
188EXPORT_SYMBOL(sync_blockdev);
189
190
191
192
193
194
195int fsync_bdev(struct block_device *bdev)
196{
197 struct super_block *sb = get_super(bdev);
198 if (sb) {
199 int res = sync_filesystem(sb);
200 drop_super(sb);
201 return res;
202 }
203 return sync_blockdev(bdev);
204}
205EXPORT_SYMBOL(fsync_bdev);
206
207
208
209
210
211
212
213
214
215
216
217
218
219struct super_block *freeze_bdev(struct block_device *bdev)
220{
221 struct super_block *sb;
222 int error = 0;
223
224 mutex_lock(&bdev->bd_fsfreeze_mutex);
225 if (++bdev->bd_fsfreeze_count > 1) {
226
227
228
229
230
231 sb = get_super(bdev);
232 drop_super(sb);
233 mutex_unlock(&bdev->bd_fsfreeze_mutex);
234 return sb;
235 }
236
237 sb = get_active_super(bdev);
238 if (!sb)
239 goto out;
240 error = freeze_super(sb);
241 if (error) {
242 deactivate_super(sb);
243 bdev->bd_fsfreeze_count--;
244 mutex_unlock(&bdev->bd_fsfreeze_mutex);
245 return ERR_PTR(error);
246 }
247 deactivate_super(sb);
248 out:
249 sync_blockdev(bdev);
250 mutex_unlock(&bdev->bd_fsfreeze_mutex);
251 return sb;
252}
253EXPORT_SYMBOL(freeze_bdev);
254
255
256
257
258
259
260
261
262int thaw_bdev(struct block_device *bdev, struct super_block *sb)
263{
264 int error = -EINVAL;
265
266 mutex_lock(&bdev->bd_fsfreeze_mutex);
267 if (!bdev->bd_fsfreeze_count)
268 goto out;
269
270 error = 0;
271 if (--bdev->bd_fsfreeze_count > 0)
272 goto out;
273
274 if (!sb)
275 goto out;
276
277 error = thaw_super(sb);
278 if (error) {
279 bdev->bd_fsfreeze_count++;
280 mutex_unlock(&bdev->bd_fsfreeze_mutex);
281 return error;
282 }
283out:
284 mutex_unlock(&bdev->bd_fsfreeze_mutex);
285 return 0;
286}
287EXPORT_SYMBOL(thaw_bdev);
288
289static int blkdev_writepage(struct page *page, struct writeback_control *wbc)
290{
291 return block_write_full_page(page, blkdev_get_block, wbc);
292}
293
294static int blkdev_readpage(struct file * file, struct page * page)
295{
296 return block_read_full_page(page, blkdev_get_block);
297}
298
299static int blkdev_write_begin(struct file *file, struct address_space *mapping,
300 loff_t pos, unsigned len, unsigned flags,
301 struct page **pagep, void **fsdata)
302{
303 return block_write_begin(mapping, pos, len, flags, pagep,
304 blkdev_get_block);
305}
306
307static int blkdev_write_end(struct file *file, struct address_space *mapping,
308 loff_t pos, unsigned len, unsigned copied,
309 struct page *page, void *fsdata)
310{
311 int ret;
312 ret = block_write_end(file, mapping, pos, len, copied, page, fsdata);
313
314 unlock_page(page);
315 page_cache_release(page);
316
317 return ret;
318}
319
320
321
322
323
324
325static loff_t block_llseek(struct file *file, loff_t offset, int whence)
326{
327 struct inode *bd_inode = file->f_mapping->host;
328 loff_t size;
329 loff_t retval;
330
331 mutex_lock(&bd_inode->i_mutex);
332 size = i_size_read(bd_inode);
333
334 retval = -EINVAL;
335 switch (whence) {
336 case SEEK_END:
337 offset += size;
338 break;
339 case SEEK_CUR:
340 offset += file->f_pos;
341 case SEEK_SET:
342 break;
343 default:
344 goto out;
345 }
346 if (offset >= 0 && offset <= size) {
347 if (offset != file->f_pos) {
348 file->f_pos = offset;
349 }
350 retval = offset;
351 }
352out:
353 mutex_unlock(&bd_inode->i_mutex);
354 return retval;
355}
356
357int blkdev_fsync(struct file *filp, loff_t start, loff_t end, int datasync)
358{
359 struct inode *bd_inode = filp->f_mapping->host;
360 struct block_device *bdev = I_BDEV(bd_inode);
361 int error;
362
363 error = filemap_write_and_wait_range(filp->f_mapping, start, end);
364 if (error)
365 return error;
366
367
368
369
370
371
372 error = blkdev_issue_flush(bdev, GFP_KERNEL, NULL);
373 if (error == -EOPNOTSUPP)
374 error = 0;
375
376 return error;
377}
378EXPORT_SYMBOL(blkdev_fsync);
379
380
381
382
383
384static __cacheline_aligned_in_smp DEFINE_SPINLOCK(bdev_lock);
385static struct kmem_cache * bdev_cachep __read_mostly;
386
387static struct inode *bdev_alloc_inode(struct super_block *sb)
388{
389 struct bdev_inode *ei = kmem_cache_alloc(bdev_cachep, GFP_KERNEL);
390 if (!ei)
391 return NULL;
392 return &ei->vfs_inode;
393}
394
395static void bdev_i_callback(struct rcu_head *head)
396{
397 struct inode *inode = container_of(head, struct inode, i_rcu);
398 struct bdev_inode *bdi = BDEV_I(inode);
399
400 kmem_cache_free(bdev_cachep, bdi);
401}
402
403static void bdev_destroy_inode(struct inode *inode)
404{
405 call_rcu(&inode->i_rcu, bdev_i_callback);
406}
407
408static void init_once(void *foo)
409{
410 struct bdev_inode *ei = (struct bdev_inode *) foo;
411 struct block_device *bdev = &ei->bdev;
412
413 memset(bdev, 0, sizeof(*bdev));
414 mutex_init(&bdev->bd_mutex);
415 INIT_LIST_HEAD(&bdev->bd_inodes);
416 INIT_LIST_HEAD(&bdev->bd_list);
417#ifdef CONFIG_SYSFS
418 INIT_LIST_HEAD(&bdev->bd_holder_disks);
419#endif
420 inode_init_once(&ei->vfs_inode);
421
422 mutex_init(&bdev->bd_fsfreeze_mutex);
423}
424
425static inline void __bd_forget(struct inode *inode)
426{
427 list_del_init(&inode->i_devices);
428 inode->i_bdev = NULL;
429 inode->i_mapping = &inode->i_data;
430}
431
432static void bdev_evict_inode(struct inode *inode)
433{
434 struct block_device *bdev = &BDEV_I(inode)->bdev;
435 struct list_head *p;
436 truncate_inode_pages(&inode->i_data, 0);
437 invalidate_inode_buffers(inode);
438 clear_inode(inode);
439 spin_lock(&bdev_lock);
440 while ( (p = bdev->bd_inodes.next) != &bdev->bd_inodes ) {
441 __bd_forget(list_entry(p, struct inode, i_devices));
442 }
443 list_del_init(&bdev->bd_list);
444 spin_unlock(&bdev_lock);
445}
446
447static const struct super_operations bdev_sops = {
448 .statfs = simple_statfs,
449 .alloc_inode = bdev_alloc_inode,
450 .destroy_inode = bdev_destroy_inode,
451 .drop_inode = generic_delete_inode,
452 .evict_inode = bdev_evict_inode,
453};
454
455static struct dentry *bd_mount(struct file_system_type *fs_type,
456 int flags, const char *dev_name, void *data)
457{
458 return mount_pseudo(fs_type, "bdev:", &bdev_sops, NULL, BDEVFS_MAGIC);
459}
460
461static struct file_system_type bd_type = {
462 .name = "bdev",
463 .mount = bd_mount,
464 .kill_sb = kill_anon_super,
465};
466
467static struct super_block *blockdev_superblock __read_mostly;
468
469void __init bdev_cache_init(void)
470{
471 int err;
472 static struct vfsmount *bd_mnt;
473
474 bdev_cachep = kmem_cache_create("bdev_cache", sizeof(struct bdev_inode),
475 0, (SLAB_HWCACHE_ALIGN|SLAB_RECLAIM_ACCOUNT|
476 SLAB_MEM_SPREAD|SLAB_PANIC),
477 init_once);
478 err = register_filesystem(&bd_type);
479 if (err)
480 panic("Cannot register bdev pseudo-fs");
481 bd_mnt = kern_mount(&bd_type);
482 if (IS_ERR(bd_mnt))
483 panic("Cannot create bdev pseudo-fs");
484 blockdev_superblock = bd_mnt->mnt_sb;
485}
486
487
488
489
490
491
492static inline unsigned long hash(dev_t dev)
493{
494 return MAJOR(dev)+MINOR(dev);
495}
496
497static int bdev_test(struct inode *inode, void *data)
498{
499 return BDEV_I(inode)->bdev.bd_dev == *(dev_t *)data;
500}
501
502static int bdev_set(struct inode *inode, void *data)
503{
504 BDEV_I(inode)->bdev.bd_dev = *(dev_t *)data;
505 return 0;
506}
507
508static LIST_HEAD(all_bdevs);
509
510struct block_device *bdget(dev_t dev)
511{
512 struct block_device *bdev;
513 struct inode *inode;
514
515 inode = iget5_locked(blockdev_superblock, hash(dev),
516 bdev_test, bdev_set, &dev);
517
518 if (!inode)
519 return NULL;
520
521 bdev = &BDEV_I(inode)->bdev;
522
523 if (inode->i_state & I_NEW) {
524 bdev->bd_contains = NULL;
525 bdev->bd_super = NULL;
526 bdev->bd_inode = inode;
527 bdev->bd_block_size = (1 << inode->i_blkbits);
528 bdev->bd_part_count = 0;
529 bdev->bd_invalidated = 0;
530 inode->i_mode = S_IFBLK;
531 inode->i_rdev = dev;
532 inode->i_bdev = bdev;
533 inode->i_data.a_ops = &def_blk_aops;
534 mapping_set_gfp_mask(&inode->i_data, GFP_USER);
535 inode->i_data.backing_dev_info = &default_backing_dev_info;
536 spin_lock(&bdev_lock);
537 list_add(&bdev->bd_list, &all_bdevs);
538 spin_unlock(&bdev_lock);
539 unlock_new_inode(inode);
540 }
541 return bdev;
542}
543
544EXPORT_SYMBOL(bdget);
545
546
547
548
549
550struct block_device *bdgrab(struct block_device *bdev)
551{
552 ihold(bdev->bd_inode);
553 return bdev;
554}
555EXPORT_SYMBOL(bdgrab);
556
557long nr_blockdev_pages(void)
558{
559 struct block_device *bdev;
560 long ret = 0;
561 spin_lock(&bdev_lock);
562 list_for_each_entry(bdev, &all_bdevs, bd_list) {
563 ret += bdev->bd_inode->i_mapping->nrpages;
564 }
565 spin_unlock(&bdev_lock);
566 return ret;
567}
568
569void bdput(struct block_device *bdev)
570{
571 iput(bdev->bd_inode);
572}
573
574EXPORT_SYMBOL(bdput);
575
576static struct block_device *bd_acquire(struct inode *inode)
577{
578 struct block_device *bdev;
579
580 spin_lock(&bdev_lock);
581 bdev = inode->i_bdev;
582 if (bdev) {
583 ihold(bdev->bd_inode);
584 spin_unlock(&bdev_lock);
585 return bdev;
586 }
587 spin_unlock(&bdev_lock);
588
589 bdev = bdget(inode->i_rdev);
590 if (bdev) {
591 spin_lock(&bdev_lock);
592 if (!inode->i_bdev) {
593
594
595
596
597
598
599 ihold(bdev->bd_inode);
600 inode->i_bdev = bdev;
601 inode->i_mapping = bdev->bd_inode->i_mapping;
602 list_add(&inode->i_devices, &bdev->bd_inodes);
603 }
604 spin_unlock(&bdev_lock);
605 }
606 return bdev;
607}
608
609static inline int sb_is_blkdev_sb(struct super_block *sb)
610{
611 return sb == blockdev_superblock;
612}
613
614
615
616void bd_forget(struct inode *inode)
617{
618 struct block_device *bdev = NULL;
619
620 spin_lock(&bdev_lock);
621 if (!sb_is_blkdev_sb(inode->i_sb))
622 bdev = inode->i_bdev;
623 __bd_forget(inode);
624 spin_unlock(&bdev_lock);
625
626 if (bdev)
627 iput(bdev->bd_inode);
628}
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644static bool bd_may_claim(struct block_device *bdev, struct block_device *whole,
645 void *holder)
646{
647 if (bdev->bd_holder == holder)
648 return true;
649 else if (bdev->bd_holder != NULL)
650 return false;
651 else if (bdev->bd_contains == bdev)
652 return true;
653
654 else if (whole->bd_holder == bd_may_claim)
655 return true;
656 else if (whole->bd_holder != NULL)
657 return false;
658 else
659 return true;
660}
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680static int bd_prepare_to_claim(struct block_device *bdev,
681 struct block_device *whole, void *holder)
682{
683retry:
684
685 if (!bd_may_claim(bdev, whole, holder))
686 return -EBUSY;
687
688
689 if (whole->bd_claiming) {
690 wait_queue_head_t *wq = bit_waitqueue(&whole->bd_claiming, 0);
691 DEFINE_WAIT(wait);
692
693 prepare_to_wait(wq, &wait, TASK_UNINTERRUPTIBLE);
694 spin_unlock(&bdev_lock);
695 schedule();
696 finish_wait(wq, &wait);
697 spin_lock(&bdev_lock);
698 goto retry;
699 }
700
701
702 return 0;
703}
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728static struct block_device *bd_start_claiming(struct block_device *bdev,
729 void *holder)
730{
731 struct gendisk *disk;
732 struct block_device *whole;
733 int partno, err;
734
735 might_sleep();
736
737
738
739
740
741 disk = get_gendisk(bdev->bd_dev, &partno);
742 if (!disk)
743 return ERR_PTR(-ENXIO);
744
745
746
747
748
749
750
751
752
753 if (partno)
754 whole = bdget_disk(disk, 0);
755 else
756 whole = bdgrab(bdev);
757
758 module_put(disk->fops->owner);
759 put_disk(disk);
760 if (!whole)
761 return ERR_PTR(-ENOMEM);
762
763
764 spin_lock(&bdev_lock);
765
766 err = bd_prepare_to_claim(bdev, whole, holder);
767 if (err == 0) {
768 whole->bd_claiming = holder;
769 spin_unlock(&bdev_lock);
770 return whole;
771 } else {
772 spin_unlock(&bdev_lock);
773 bdput(whole);
774 return ERR_PTR(err);
775 }
776}
777
778#ifdef CONFIG_SYSFS
779struct bd_holder_disk {
780 struct list_head list;
781 struct gendisk *disk;
782 int refcnt;
783};
784
785static struct bd_holder_disk *bd_find_holder_disk(struct block_device *bdev,
786 struct gendisk *disk)
787{
788 struct bd_holder_disk *holder;
789
790 list_for_each_entry(holder, &bdev->bd_holder_disks, list)
791 if (holder->disk == disk)
792 return holder;
793 return NULL;
794}
795
796static int add_symlink(struct kobject *from, struct kobject *to)
797{
798 return sysfs_create_link(from, to, kobject_name(to));
799}
800
801static void del_symlink(struct kobject *from, struct kobject *to)
802{
803 sysfs_remove_link(from, kobject_name(to));
804}
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834int bd_link_disk_holder(struct block_device *bdev, struct gendisk *disk)
835{
836 struct bd_holder_disk *holder;
837 int ret = 0;
838
839 mutex_lock(&bdev->bd_mutex);
840
841 WARN_ON_ONCE(!bdev->bd_holder);
842
843
844 if (WARN_ON(!disk->slave_dir || !bdev->bd_part->holder_dir))
845 goto out_unlock;
846
847 holder = bd_find_holder_disk(bdev, disk);
848 if (holder) {
849 holder->refcnt++;
850 goto out_unlock;
851 }
852
853 holder = kzalloc(sizeof(*holder), GFP_KERNEL);
854 if (!holder) {
855 ret = -ENOMEM;
856 goto out_unlock;
857 }
858
859 INIT_LIST_HEAD(&holder->list);
860 holder->disk = disk;
861 holder->refcnt = 1;
862
863 ret = add_symlink(disk->slave_dir, &part_to_dev(bdev->bd_part)->kobj);
864 if (ret)
865 goto out_free;
866
867 ret = add_symlink(bdev->bd_part->holder_dir, &disk_to_dev(disk)->kobj);
868 if (ret)
869 goto out_del;
870
871
872
873
874 kobject_get(bdev->bd_part->holder_dir);
875
876 list_add(&holder->list, &bdev->bd_holder_disks);
877 goto out_unlock;
878
879out_del:
880 del_symlink(disk->slave_dir, &part_to_dev(bdev->bd_part)->kobj);
881out_free:
882 kfree(holder);
883out_unlock:
884 mutex_unlock(&bdev->bd_mutex);
885 return ret;
886}
887EXPORT_SYMBOL_GPL(bd_link_disk_holder);
888
889
890
891
892
893
894
895
896
897
898
899void bd_unlink_disk_holder(struct block_device *bdev, struct gendisk *disk)
900{
901 struct bd_holder_disk *holder;
902
903 mutex_lock(&bdev->bd_mutex);
904
905 holder = bd_find_holder_disk(bdev, disk);
906
907 if (!WARN_ON_ONCE(holder == NULL) && !--holder->refcnt) {
908 del_symlink(disk->slave_dir, &part_to_dev(bdev->bd_part)->kobj);
909 del_symlink(bdev->bd_part->holder_dir,
910 &disk_to_dev(disk)->kobj);
911 kobject_put(bdev->bd_part->holder_dir);
912 list_del_init(&holder->list);
913 kfree(holder);
914 }
915
916 mutex_unlock(&bdev->bd_mutex);
917}
918EXPORT_SYMBOL_GPL(bd_unlink_disk_holder);
919#endif
920
921
922
923
924
925
926
927
928
929
930
931static void flush_disk(struct block_device *bdev, bool kill_dirty)
932{
933 if (__invalidate_device(bdev, kill_dirty)) {
934 char name[BDEVNAME_SIZE] = "";
935
936 if (bdev->bd_disk)
937 disk_name(bdev->bd_disk, 0, name);
938 printk(KERN_WARNING "VFS: busy inodes on changed media or "
939 "resized disk %s\n", name);
940 }
941
942 if (!bdev->bd_disk)
943 return;
944 if (disk_part_scan_enabled(bdev->bd_disk))
945 bdev->bd_invalidated = 1;
946}
947
948
949
950
951
952
953
954
955
956void check_disk_size_change(struct gendisk *disk, struct block_device *bdev)
957{
958 loff_t disk_size, bdev_size;
959
960 disk_size = (loff_t)get_capacity(disk) << 9;
961 bdev_size = i_size_read(bdev->bd_inode);
962 if (disk_size != bdev_size) {
963 char name[BDEVNAME_SIZE];
964
965 disk_name(disk, 0, name);
966 printk(KERN_INFO
967 "%s: detected capacity change from %lld to %lld\n",
968 name, bdev_size, disk_size);
969 i_size_write(bdev->bd_inode, disk_size);
970 flush_disk(bdev, false);
971 }
972}
973EXPORT_SYMBOL(check_disk_size_change);
974
975
976
977
978
979
980
981
982
983int revalidate_disk(struct gendisk *disk)
984{
985 struct block_device *bdev;
986 int ret = 0;
987
988 if (disk->fops->revalidate_disk)
989 ret = disk->fops->revalidate_disk(disk);
990
991 bdev = bdget_disk(disk, 0);
992 if (!bdev)
993 return ret;
994
995 mutex_lock(&bdev->bd_mutex);
996 check_disk_size_change(disk, bdev);
997 bdev->bd_invalidated = 0;
998 mutex_unlock(&bdev->bd_mutex);
999 bdput(bdev);
1000 return ret;
1001}
1002EXPORT_SYMBOL(revalidate_disk);
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013int check_disk_change(struct block_device *bdev)
1014{
1015 struct gendisk *disk = bdev->bd_disk;
1016 const struct block_device_operations *bdops = disk->fops;
1017 unsigned int events;
1018
1019 events = disk_clear_events(disk, DISK_EVENT_MEDIA_CHANGE |
1020 DISK_EVENT_EJECT_REQUEST);
1021 if (!(events & DISK_EVENT_MEDIA_CHANGE))
1022 return 0;
1023
1024 flush_disk(bdev, true);
1025 if (bdops->revalidate_disk)
1026 bdops->revalidate_disk(bdev->bd_disk);
1027 return 1;
1028}
1029
1030EXPORT_SYMBOL(check_disk_change);
1031
1032void bd_set_size(struct block_device *bdev, loff_t size)
1033{
1034 unsigned bsize = bdev_logical_block_size(bdev);
1035
1036 mutex_lock(&bdev->bd_inode->i_mutex);
1037 i_size_write(bdev->bd_inode, size);
1038 mutex_unlock(&bdev->bd_inode->i_mutex);
1039 while (bsize < PAGE_CACHE_SIZE) {
1040 if (size & bsize)
1041 break;
1042 bsize <<= 1;
1043 }
1044 bdev->bd_block_size = bsize;
1045 bdev->bd_inode->i_blkbits = blksize_bits(bsize);
1046}
1047EXPORT_SYMBOL(bd_set_size);
1048
1049static void __blkdev_put(struct block_device *bdev, fmode_t mode, int for_part);
1050
1051
1052
1053
1054
1055
1056
1057
1058static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part)
1059{
1060 struct gendisk *disk;
1061 struct module *owner;
1062 int ret;
1063 int partno;
1064 int perm = 0;
1065
1066 if (mode & FMODE_READ)
1067 perm |= MAY_READ;
1068 if (mode & FMODE_WRITE)
1069 perm |= MAY_WRITE;
1070
1071
1072
1073 if (!for_part) {
1074 ret = devcgroup_inode_permission(bdev->bd_inode, perm);
1075 if (ret != 0) {
1076 bdput(bdev);
1077 return ret;
1078 }
1079 }
1080
1081 restart:
1082
1083 ret = -ENXIO;
1084 disk = get_gendisk(bdev->bd_dev, &partno);
1085 if (!disk)
1086 goto out;
1087 owner = disk->fops->owner;
1088
1089 disk_block_events(disk);
1090 mutex_lock_nested(&bdev->bd_mutex, for_part);
1091 if (!bdev->bd_openers) {
1092 bdev->bd_disk = disk;
1093 bdev->bd_queue = disk->queue;
1094 bdev->bd_contains = bdev;
1095 if (!partno) {
1096 struct backing_dev_info *bdi;
1097
1098 ret = -ENXIO;
1099 bdev->bd_part = disk_get_part(disk, partno);
1100 if (!bdev->bd_part)
1101 goto out_clear;
1102
1103 ret = 0;
1104 if (disk->fops->open) {
1105 ret = disk->fops->open(bdev, mode);
1106 if (ret == -ERESTARTSYS) {
1107
1108
1109
1110
1111 disk_put_part(bdev->bd_part);
1112 bdev->bd_part = NULL;
1113 bdev->bd_disk = NULL;
1114 bdev->bd_queue = NULL;
1115 mutex_unlock(&bdev->bd_mutex);
1116 disk_unblock_events(disk);
1117 put_disk(disk);
1118 module_put(owner);
1119 goto restart;
1120 }
1121 }
1122
1123 if (!ret) {
1124 bd_set_size(bdev,(loff_t)get_capacity(disk)<<9);
1125 bdi = blk_get_backing_dev_info(bdev);
1126 if (bdi == NULL)
1127 bdi = &default_backing_dev_info;
1128 bdev_inode_switch_bdi(bdev->bd_inode, bdi);
1129 }
1130
1131
1132
1133
1134
1135
1136
1137 if (bdev->bd_invalidated) {
1138 if (!ret)
1139 rescan_partitions(disk, bdev);
1140 else if (ret == -ENOMEDIUM)
1141 invalidate_partitions(disk, bdev);
1142 }
1143 if (ret)
1144 goto out_clear;
1145 } else {
1146 struct block_device *whole;
1147 whole = bdget_disk(disk, 0);
1148 ret = -ENOMEM;
1149 if (!whole)
1150 goto out_clear;
1151 BUG_ON(for_part);
1152 ret = __blkdev_get(whole, mode, 1);
1153 if (ret)
1154 goto out_clear;
1155 bdev->bd_contains = whole;
1156 bdev_inode_switch_bdi(bdev->bd_inode,
1157 whole->bd_inode->i_data.backing_dev_info);
1158 bdev->bd_part = disk_get_part(disk, partno);
1159 if (!(disk->flags & GENHD_FL_UP) ||
1160 !bdev->bd_part || !bdev->bd_part->nr_sects) {
1161 ret = -ENXIO;
1162 goto out_clear;
1163 }
1164 bd_set_size(bdev, (loff_t)bdev->bd_part->nr_sects << 9);
1165 }
1166 } else {
1167 if (bdev->bd_contains == bdev) {
1168 ret = 0;
1169 if (bdev->bd_disk->fops->open)
1170 ret = bdev->bd_disk->fops->open(bdev, mode);
1171
1172 if (bdev->bd_invalidated) {
1173 if (!ret)
1174 rescan_partitions(bdev->bd_disk, bdev);
1175 else if (ret == -ENOMEDIUM)
1176 invalidate_partitions(bdev->bd_disk, bdev);
1177 }
1178 if (ret)
1179 goto out_unlock_bdev;
1180 }
1181
1182 put_disk(disk);
1183 module_put(owner);
1184 }
1185 bdev->bd_openers++;
1186 if (for_part)
1187 bdev->bd_part_count++;
1188 mutex_unlock(&bdev->bd_mutex);
1189 disk_unblock_events(disk);
1190 return 0;
1191
1192 out_clear:
1193 disk_put_part(bdev->bd_part);
1194 bdev->bd_disk = NULL;
1195 bdev->bd_part = NULL;
1196 bdev->bd_queue = NULL;
1197 bdev_inode_switch_bdi(bdev->bd_inode, &default_backing_dev_info);
1198 if (bdev != bdev->bd_contains)
1199 __blkdev_put(bdev->bd_contains, mode, 1);
1200 bdev->bd_contains = NULL;
1201 out_unlock_bdev:
1202 mutex_unlock(&bdev->bd_mutex);
1203 disk_unblock_events(disk);
1204 put_disk(disk);
1205 module_put(owner);
1206 out:
1207 bdput(bdev);
1208
1209 return ret;
1210}
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231int blkdev_get(struct block_device *bdev, fmode_t mode, void *holder)
1232{
1233 struct block_device *whole = NULL;
1234 int res;
1235
1236 WARN_ON_ONCE((mode & FMODE_EXCL) && !holder);
1237
1238 if ((mode & FMODE_EXCL) && holder) {
1239 whole = bd_start_claiming(bdev, holder);
1240 if (IS_ERR(whole)) {
1241 bdput(bdev);
1242 return PTR_ERR(whole);
1243 }
1244 }
1245
1246 res = __blkdev_get(bdev, mode, 0);
1247
1248 if (whole) {
1249 struct gendisk *disk = whole->bd_disk;
1250
1251
1252 mutex_lock(&bdev->bd_mutex);
1253 spin_lock(&bdev_lock);
1254
1255 if (!res) {
1256 BUG_ON(!bd_may_claim(bdev, whole, holder));
1257
1258
1259
1260
1261
1262
1263 whole->bd_holders++;
1264 whole->bd_holder = bd_may_claim;
1265 bdev->bd_holders++;
1266 bdev->bd_holder = holder;
1267 }
1268
1269
1270 BUG_ON(whole->bd_claiming != holder);
1271 whole->bd_claiming = NULL;
1272 wake_up_bit(&whole->bd_claiming, 0);
1273
1274 spin_unlock(&bdev_lock);
1275
1276
1277
1278
1279
1280
1281
1282
1283 if (!res && (mode & FMODE_WRITE) && !bdev->bd_write_holder &&
1284 (disk->flags & GENHD_FL_BLOCK_EVENTS_ON_EXCL_WRITE)) {
1285 bdev->bd_write_holder = true;
1286 disk_block_events(disk);
1287 }
1288
1289 mutex_unlock(&bdev->bd_mutex);
1290 bdput(whole);
1291 }
1292
1293 return res;
1294}
1295EXPORT_SYMBOL(blkdev_get);
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314struct block_device *blkdev_get_by_path(const char *path, fmode_t mode,
1315 void *holder)
1316{
1317 struct block_device *bdev;
1318 int err;
1319
1320 bdev = lookup_bdev(path);
1321 if (IS_ERR(bdev))
1322 return bdev;
1323
1324 err = blkdev_get(bdev, mode, holder);
1325 if (err)
1326 return ERR_PTR(err);
1327
1328 if ((mode & FMODE_WRITE) && bdev_read_only(bdev)) {
1329 blkdev_put(bdev, mode);
1330 return ERR_PTR(-EACCES);
1331 }
1332
1333 return bdev;
1334}
1335EXPORT_SYMBOL(blkdev_get_by_path);
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359struct block_device *blkdev_get_by_dev(dev_t dev, fmode_t mode, void *holder)
1360{
1361 struct block_device *bdev;
1362 int err;
1363
1364 bdev = bdget(dev);
1365 if (!bdev)
1366 return ERR_PTR(-ENOMEM);
1367
1368 err = blkdev_get(bdev, mode, holder);
1369 if (err)
1370 return ERR_PTR(err);
1371
1372 return bdev;
1373}
1374EXPORT_SYMBOL(blkdev_get_by_dev);
1375
1376static int blkdev_open(struct inode * inode, struct file * filp)
1377{
1378 struct block_device *bdev;
1379
1380
1381
1382
1383
1384
1385
1386 filp->f_flags |= O_LARGEFILE;
1387
1388 if (filp->f_flags & O_NDELAY)
1389 filp->f_mode |= FMODE_NDELAY;
1390 if (filp->f_flags & O_EXCL)
1391 filp->f_mode |= FMODE_EXCL;
1392 if ((filp->f_flags & O_ACCMODE) == 3)
1393 filp->f_mode |= FMODE_WRITE_IOCTL;
1394
1395 bdev = bd_acquire(inode);
1396 if (bdev == NULL)
1397 return -ENOMEM;
1398
1399 filp->f_mapping = bdev->bd_inode->i_mapping;
1400
1401 return blkdev_get(bdev, filp->f_mode, filp);
1402}
1403
1404static void __blkdev_put(struct block_device *bdev, fmode_t mode, int for_part)
1405{
1406 struct gendisk *disk = bdev->bd_disk;
1407 struct block_device *victim = NULL;
1408
1409 mutex_lock_nested(&bdev->bd_mutex, for_part);
1410 if (for_part)
1411 bdev->bd_part_count--;
1412
1413 if (!--bdev->bd_openers) {
1414 WARN_ON_ONCE(bdev->bd_holders);
1415 sync_blockdev(bdev);
1416 kill_bdev(bdev);
1417
1418
1419
1420 bdev_inode_switch_bdi(bdev->bd_inode,
1421 &default_backing_dev_info);
1422 }
1423 if (bdev->bd_contains == bdev) {
1424 if (disk->fops->release)
1425 disk->fops->release(disk, mode);
1426 }
1427 if (!bdev->bd_openers) {
1428 struct module *owner = disk->fops->owner;
1429
1430 disk_put_part(bdev->bd_part);
1431 bdev->bd_part = NULL;
1432 bdev->bd_disk = NULL;
1433 if (bdev != bdev->bd_contains)
1434 victim = bdev->bd_contains;
1435 bdev->bd_contains = NULL;
1436
1437 put_disk(disk);
1438 module_put(owner);
1439 }
1440 mutex_unlock(&bdev->bd_mutex);
1441 bdput(bdev);
1442 if (victim)
1443 __blkdev_put(victim, mode, 1);
1444}
1445
1446void blkdev_put(struct block_device *bdev, fmode_t mode)
1447{
1448 mutex_lock(&bdev->bd_mutex);
1449
1450 if (mode & FMODE_EXCL) {
1451 bool bdev_free;
1452
1453
1454
1455
1456
1457
1458 spin_lock(&bdev_lock);
1459
1460 WARN_ON_ONCE(--bdev->bd_holders < 0);
1461 WARN_ON_ONCE(--bdev->bd_contains->bd_holders < 0);
1462
1463
1464 if ((bdev_free = !bdev->bd_holders))
1465 bdev->bd_holder = NULL;
1466 if (!bdev->bd_contains->bd_holders)
1467 bdev->bd_contains->bd_holder = NULL;
1468
1469 spin_unlock(&bdev_lock);
1470
1471
1472
1473
1474
1475 if (bdev_free && bdev->bd_write_holder) {
1476 disk_unblock_events(bdev->bd_disk);
1477 bdev->bd_write_holder = false;
1478 }
1479 }
1480
1481
1482
1483
1484
1485
1486 disk_flush_events(bdev->bd_disk, DISK_EVENT_MEDIA_CHANGE);
1487
1488 mutex_unlock(&bdev->bd_mutex);
1489
1490 __blkdev_put(bdev, mode, 0);
1491}
1492EXPORT_SYMBOL(blkdev_put);
1493
1494static int blkdev_close(struct inode * inode, struct file * filp)
1495{
1496 struct block_device *bdev = I_BDEV(filp->f_mapping->host);
1497 blkdev_put(bdev, filp->f_mode);
1498 return 0;
1499}
1500
1501static long block_ioctl(struct file *file, unsigned cmd, unsigned long arg)
1502{
1503 struct block_device *bdev = I_BDEV(file->f_mapping->host);
1504 fmode_t mode = file->f_mode;
1505
1506
1507
1508
1509
1510 if (file->f_flags & O_NDELAY)
1511 mode |= FMODE_NDELAY;
1512 else
1513 mode &= ~FMODE_NDELAY;
1514
1515 return blkdev_ioctl(bdev, mode, cmd, arg);
1516}
1517
1518
1519
1520
1521
1522
1523
1524
1525ssize_t blkdev_aio_write(struct kiocb *iocb, const struct iovec *iov,
1526 unsigned long nr_segs, loff_t pos)
1527{
1528 struct file *file = iocb->ki_filp;
1529 struct blk_plug plug;
1530 ssize_t ret;
1531
1532 BUG_ON(iocb->ki_pos != pos);
1533
1534 blk_start_plug(&plug);
1535 ret = __generic_file_aio_write(iocb, iov, nr_segs, &iocb->ki_pos);
1536 if (ret > 0 || ret == -EIOCBQUEUED) {
1537 ssize_t err;
1538
1539 err = generic_write_sync(file, pos, ret);
1540 if (err < 0 && ret > 0)
1541 ret = err;
1542 }
1543 blk_finish_plug(&plug);
1544 return ret;
1545}
1546EXPORT_SYMBOL_GPL(blkdev_aio_write);
1547
1548static ssize_t blkdev_aio_read(struct kiocb *iocb, const struct iovec *iov,
1549 unsigned long nr_segs, loff_t pos)
1550{
1551 struct file *file = iocb->ki_filp;
1552 struct inode *bd_inode = file->f_mapping->host;
1553 loff_t size = i_size_read(bd_inode);
1554
1555 if (pos >= size)
1556 return 0;
1557
1558 size -= pos;
1559 if (size < iocb->ki_left)
1560 nr_segs = iov_shorten((struct iovec *)iov, nr_segs, size);
1561 return generic_file_aio_read(iocb, iov, nr_segs, pos);
1562}
1563
1564
1565
1566
1567
1568static int blkdev_releasepage(struct page *page, gfp_t wait)
1569{
1570 struct super_block *super = BDEV_I(page->mapping->host)->bdev.bd_super;
1571
1572 if (super && super->s_op->bdev_try_to_free_page)
1573 return super->s_op->bdev_try_to_free_page(super, page, wait);
1574
1575 return try_to_free_buffers(page);
1576}
1577
1578static const struct address_space_operations def_blk_aops = {
1579 .readpage = blkdev_readpage,
1580 .writepage = blkdev_writepage,
1581 .write_begin = blkdev_write_begin,
1582 .write_end = blkdev_write_end,
1583 .writepages = generic_writepages,
1584 .releasepage = blkdev_releasepage,
1585 .direct_IO = blkdev_direct_IO,
1586};
1587
1588const struct file_operations def_blk_fops = {
1589 .open = blkdev_open,
1590 .release = blkdev_close,
1591 .llseek = block_llseek,
1592 .read = do_sync_read,
1593 .write = do_sync_write,
1594 .aio_read = blkdev_aio_read,
1595 .aio_write = blkdev_aio_write,
1596 .mmap = generic_file_mmap,
1597 .fsync = blkdev_fsync,
1598 .unlocked_ioctl = block_ioctl,
1599#ifdef CONFIG_COMPAT
1600 .compat_ioctl = compat_blkdev_ioctl,
1601#endif
1602 .splice_read = generic_file_splice_read,
1603 .splice_write = generic_file_splice_write,
1604};
1605
1606int ioctl_by_bdev(struct block_device *bdev, unsigned cmd, unsigned long arg)
1607{
1608 int res;
1609 mm_segment_t old_fs = get_fs();
1610 set_fs(KERNEL_DS);
1611 res = blkdev_ioctl(bdev, 0, cmd, arg);
1612 set_fs(old_fs);
1613 return res;
1614}
1615
1616EXPORT_SYMBOL(ioctl_by_bdev);
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626struct block_device *lookup_bdev(const char *pathname)
1627{
1628 struct block_device *bdev;
1629 struct inode *inode;
1630 struct path path;
1631 int error;
1632
1633 if (!pathname || !*pathname)
1634 return ERR_PTR(-EINVAL);
1635
1636 error = kern_path(pathname, LOOKUP_FOLLOW, &path);
1637 if (error)
1638 return ERR_PTR(error);
1639
1640 inode = path.dentry->d_inode;
1641 error = -ENOTBLK;
1642 if (!S_ISBLK(inode->i_mode))
1643 goto fail;
1644 error = -EACCES;
1645 if (path.mnt->mnt_flags & MNT_NODEV)
1646 goto fail;
1647 error = -ENOMEM;
1648 bdev = bd_acquire(inode);
1649 if (!bdev)
1650 goto fail;
1651out:
1652 path_put(&path);
1653 return bdev;
1654fail:
1655 bdev = ERR_PTR(error);
1656 goto out;
1657}
1658EXPORT_SYMBOL(lookup_bdev);
1659
1660int __invalidate_device(struct block_device *bdev, bool kill_dirty)
1661{
1662 struct super_block *sb = get_super(bdev);
1663 int res = 0;
1664
1665 if (sb) {
1666
1667
1668
1669
1670
1671
1672 shrink_dcache_sb(sb);
1673 res = invalidate_inodes(sb, kill_dirty);
1674 drop_super(sb);
1675 }
1676 invalidate_bdev(bdev);
1677 return res;
1678}
1679EXPORT_SYMBOL(__invalidate_device);
1680
1681void iterate_bdevs(void (*func)(struct block_device *, void *), void *arg)
1682{
1683 struct inode *inode, *old_inode = NULL;
1684
1685 spin_lock(&inode_sb_list_lock);
1686 list_for_each_entry(inode, &blockdev_superblock->s_inodes, i_sb_list) {
1687 struct address_space *mapping = inode->i_mapping;
1688
1689 spin_lock(&inode->i_lock);
1690 if (inode->i_state & (I_FREEING|I_WILL_FREE|I_NEW) ||
1691 mapping->nrpages == 0) {
1692 spin_unlock(&inode->i_lock);
1693 continue;
1694 }
1695 __iget(inode);
1696 spin_unlock(&inode->i_lock);
1697 spin_unlock(&inode_sb_list_lock);
1698
1699
1700
1701
1702
1703
1704
1705
1706 iput(old_inode);
1707 old_inode = inode;
1708
1709 func(I_BDEV(inode), arg);
1710
1711 spin_lock(&inode_sb_list_lock);
1712 }
1713 spin_unlock(&inode_sb_list_lock);
1714 iput(old_inode);
1715}
1716