1
2
3
4
5
6
7
8#include <linux/init.h>
9#include <linux/mm.h>
10#include <linux/fcntl.h>
11#include <linux/slab.h>
12#include <linux/kmod.h>
13#include <linux/major.h>
14#include <linux/device_cgroup.h>
15#include <linux/highmem.h>
16#include <linux/blkdev.h>
17#include <linux/module.h>
18#include <linux/blkpg.h>
19#include <linux/magic.h>
20#include <linux/buffer_head.h>
21#include <linux/swap.h>
22#include <linux/pagevec.h>
23#include <linux/writeback.h>
24#include <linux/mpage.h>
25#include <linux/mount.h>
26#include <linux/uio.h>
27#include <linux/namei.h>
28#include <linux/log2.h>
29#include <linux/cleancache.h>
30#include <asm/uaccess.h>
31#include "internal.h"
32
33struct bdev_inode {
34 struct block_device bdev;
35 struct inode vfs_inode;
36};
37
38static const struct address_space_operations def_blk_aops;
39
40static inline struct bdev_inode *BDEV_I(struct inode *inode)
41{
42 return container_of(inode, struct bdev_inode, vfs_inode);
43}
44
45inline struct block_device *I_BDEV(struct inode *inode)
46{
47 return &BDEV_I(inode)->bdev;
48}
49EXPORT_SYMBOL(I_BDEV);
50
51
52
53
54
55
56static void bdev_inode_switch_bdi(struct inode *inode,
57 struct backing_dev_info *dst)
58{
59 struct backing_dev_info *old = inode->i_data.backing_dev_info;
60
61 if (unlikely(dst == old))
62 return;
63 bdi_lock_two(&old->wb, &dst->wb);
64 spin_lock(&inode->i_lock);
65 inode->i_data.backing_dev_info = dst;
66 if (inode->i_state & I_DIRTY)
67 list_move(&inode->i_wb_list, &dst->wb.b_dirty);
68 spin_unlock(&inode->i_lock);
69 spin_unlock(&old->wb.list_lock);
70 spin_unlock(&dst->wb.list_lock);
71}
72
73sector_t blkdev_max_block(struct block_device *bdev)
74{
75 sector_t retval = ~((sector_t)0);
76 loff_t sz = i_size_read(bdev->bd_inode);
77
78 if (sz) {
79 unsigned int size = block_size(bdev);
80 unsigned int sizebits = blksize_bits(size);
81 retval = (sz >> sizebits);
82 }
83 return retval;
84}
85
86
87void kill_bdev(struct block_device *bdev)
88{
89 struct address_space *mapping = bdev->bd_inode->i_mapping;
90
91 if (mapping->nrpages == 0)
92 return;
93
94 invalidate_bh_lrus();
95 truncate_inode_pages(mapping, 0);
96}
97EXPORT_SYMBOL(kill_bdev);
98
99
100void invalidate_bdev(struct block_device *bdev)
101{
102 struct address_space *mapping = bdev->bd_inode->i_mapping;
103
104 if (mapping->nrpages == 0)
105 return;
106
107 invalidate_bh_lrus();
108 lru_add_drain_all();
109 invalidate_mapping_pages(mapping, 0, -1);
110
111
112
113 cleancache_invalidate_inode(mapping);
114}
115EXPORT_SYMBOL(invalidate_bdev);
116
117int set_blocksize(struct block_device *bdev, int size)
118{
119
120 if (size > PAGE_SIZE || size < 512 || !is_power_of_2(size))
121 return -EINVAL;
122
123
124 if (size < bdev_logical_block_size(bdev))
125 return -EINVAL;
126
127
128 if (bdev->bd_block_size != size) {
129 sync_blockdev(bdev);
130 bdev->bd_block_size = size;
131 bdev->bd_inode->i_blkbits = blksize_bits(size);
132 kill_bdev(bdev);
133 }
134 return 0;
135}
136
137EXPORT_SYMBOL(set_blocksize);
138
139int sb_set_blocksize(struct super_block *sb, int size)
140{
141 if (set_blocksize(sb->s_bdev, size))
142 return 0;
143
144
145 sb->s_blocksize = size;
146 sb->s_blocksize_bits = blksize_bits(size);
147 return sb->s_blocksize;
148}
149
150EXPORT_SYMBOL(sb_set_blocksize);
151
152int sb_min_blocksize(struct super_block *sb, int size)
153{
154 int minsize = bdev_logical_block_size(sb->s_bdev);
155 if (size < minsize)
156 size = minsize;
157 return sb_set_blocksize(sb, size);
158}
159
160EXPORT_SYMBOL(sb_min_blocksize);
161
162static int
163blkdev_get_block(struct inode *inode, sector_t iblock,
164 struct buffer_head *bh, int create)
165{
166 if (iblock >= blkdev_max_block(I_BDEV(inode))) {
167 if (create)
168 return -EIO;
169
170
171
172
173
174
175
176 return 0;
177 }
178 bh->b_bdev = I_BDEV(inode);
179 bh->b_blocknr = iblock;
180 set_buffer_mapped(bh);
181 return 0;
182}
183
184static int
185blkdev_get_blocks(struct inode *inode, sector_t iblock,
186 struct buffer_head *bh, int create)
187{
188 sector_t end_block = blkdev_max_block(I_BDEV(inode));
189 unsigned long max_blocks = bh->b_size >> inode->i_blkbits;
190
191 if ((iblock + max_blocks) > end_block) {
192 max_blocks = end_block - iblock;
193 if ((long)max_blocks <= 0) {
194 if (create)
195 return -EIO;
196
197
198
199
200 max_blocks = 0;
201 }
202 }
203
204 bh->b_bdev = I_BDEV(inode);
205 bh->b_blocknr = iblock;
206 bh->b_size = max_blocks << inode->i_blkbits;
207 if (max_blocks)
208 set_buffer_mapped(bh);
209 return 0;
210}
211
212static ssize_t
213blkdev_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov,
214 loff_t offset, unsigned long nr_segs)
215{
216 struct file *file = iocb->ki_filp;
217 struct inode *inode = file->f_mapping->host;
218
219 return __blockdev_direct_IO(rw, iocb, inode, I_BDEV(inode), iov, offset,
220 nr_segs, blkdev_get_blocks, NULL, NULL, 0);
221}
222
223int __sync_blockdev(struct block_device *bdev, int wait)
224{
225 if (!bdev)
226 return 0;
227 if (!wait)
228 return filemap_flush(bdev->bd_inode->i_mapping);
229 return filemap_write_and_wait(bdev->bd_inode->i_mapping);
230}
231
232
233
234
235
236int sync_blockdev(struct block_device *bdev)
237{
238 return __sync_blockdev(bdev, 1);
239}
240EXPORT_SYMBOL(sync_blockdev);
241
242
243
244
245
246
247int fsync_bdev(struct block_device *bdev)
248{
249 struct super_block *sb = get_super(bdev);
250 if (sb) {
251 int res = sync_filesystem(sb);
252 drop_super(sb);
253 return res;
254 }
255 return sync_blockdev(bdev);
256}
257EXPORT_SYMBOL(fsync_bdev);
258
259
260
261
262
263
264
265
266
267
268
269
270
271struct super_block *freeze_bdev(struct block_device *bdev)
272{
273 struct super_block *sb;
274 int error = 0;
275
276 mutex_lock(&bdev->bd_fsfreeze_mutex);
277 if (++bdev->bd_fsfreeze_count > 1) {
278
279
280
281
282
283 sb = get_super(bdev);
284 drop_super(sb);
285 mutex_unlock(&bdev->bd_fsfreeze_mutex);
286 return sb;
287 }
288
289 sb = get_active_super(bdev);
290 if (!sb)
291 goto out;
292 error = freeze_super(sb);
293 if (error) {
294 deactivate_super(sb);
295 bdev->bd_fsfreeze_count--;
296 mutex_unlock(&bdev->bd_fsfreeze_mutex);
297 return ERR_PTR(error);
298 }
299 deactivate_super(sb);
300 out:
301 sync_blockdev(bdev);
302 mutex_unlock(&bdev->bd_fsfreeze_mutex);
303 return sb;
304}
305EXPORT_SYMBOL(freeze_bdev);
306
307
308
309
310
311
312
313
314int thaw_bdev(struct block_device *bdev, struct super_block *sb)
315{
316 int error = -EINVAL;
317
318 mutex_lock(&bdev->bd_fsfreeze_mutex);
319 if (!bdev->bd_fsfreeze_count)
320 goto out;
321
322 error = 0;
323 if (--bdev->bd_fsfreeze_count > 0)
324 goto out;
325
326 if (!sb)
327 goto out;
328
329 error = thaw_super(sb);
330 if (error) {
331 bdev->bd_fsfreeze_count++;
332 mutex_unlock(&bdev->bd_fsfreeze_mutex);
333 return error;
334 }
335out:
336 mutex_unlock(&bdev->bd_fsfreeze_mutex);
337 return 0;
338}
339EXPORT_SYMBOL(thaw_bdev);
340
341static int blkdev_writepage(struct page *page, struct writeback_control *wbc)
342{
343 return block_write_full_page(page, blkdev_get_block, wbc);
344}
345
346static int blkdev_readpage(struct file * file, struct page * page)
347{
348 return block_read_full_page(page, blkdev_get_block);
349}
350
351static int blkdev_write_begin(struct file *file, struct address_space *mapping,
352 loff_t pos, unsigned len, unsigned flags,
353 struct page **pagep, void **fsdata)
354{
355 return block_write_begin(mapping, pos, len, flags, pagep,
356 blkdev_get_block);
357}
358
359static int blkdev_write_end(struct file *file, struct address_space *mapping,
360 loff_t pos, unsigned len, unsigned copied,
361 struct page *page, void *fsdata)
362{
363 int ret;
364 ret = block_write_end(file, mapping, pos, len, copied, page, fsdata);
365
366 unlock_page(page);
367 page_cache_release(page);
368
369 return ret;
370}
371
372
373
374
375
376
377static loff_t block_llseek(struct file *file, loff_t offset, int origin)
378{
379 struct inode *bd_inode = file->f_mapping->host;
380 loff_t size;
381 loff_t retval;
382
383 mutex_lock(&bd_inode->i_mutex);
384 size = i_size_read(bd_inode);
385
386 retval = -EINVAL;
387 switch (origin) {
388 case SEEK_END:
389 offset += size;
390 break;
391 case SEEK_CUR:
392 offset += file->f_pos;
393 case SEEK_SET:
394 break;
395 default:
396 goto out;
397 }
398 if (offset >= 0 && offset <= size) {
399 if (offset != file->f_pos) {
400 file->f_pos = offset;
401 }
402 retval = offset;
403 }
404out:
405 mutex_unlock(&bd_inode->i_mutex);
406 return retval;
407}
408
409int blkdev_fsync(struct file *filp, loff_t start, loff_t end, int datasync)
410{
411 struct inode *bd_inode = filp->f_mapping->host;
412 struct block_device *bdev = I_BDEV(bd_inode);
413 int error;
414
415 error = filemap_write_and_wait_range(filp->f_mapping, start, end);
416 if (error)
417 return error;
418
419
420
421
422
423
424 error = blkdev_issue_flush(bdev, GFP_KERNEL, NULL);
425 if (error == -EOPNOTSUPP)
426 error = 0;
427
428 return error;
429}
430EXPORT_SYMBOL(blkdev_fsync);
431
432
433
434
435
436static __cacheline_aligned_in_smp DEFINE_SPINLOCK(bdev_lock);
437static struct kmem_cache * bdev_cachep __read_mostly;
438
439static struct inode *bdev_alloc_inode(struct super_block *sb)
440{
441 struct bdev_inode *ei = kmem_cache_alloc(bdev_cachep, GFP_KERNEL);
442 if (!ei)
443 return NULL;
444 return &ei->vfs_inode;
445}
446
447static void bdev_i_callback(struct rcu_head *head)
448{
449 struct inode *inode = container_of(head, struct inode, i_rcu);
450 struct bdev_inode *bdi = BDEV_I(inode);
451
452 kmem_cache_free(bdev_cachep, bdi);
453}
454
455static void bdev_destroy_inode(struct inode *inode)
456{
457 call_rcu(&inode->i_rcu, bdev_i_callback);
458}
459
460static void init_once(void *foo)
461{
462 struct bdev_inode *ei = (struct bdev_inode *) foo;
463 struct block_device *bdev = &ei->bdev;
464
465 memset(bdev, 0, sizeof(*bdev));
466 mutex_init(&bdev->bd_mutex);
467 INIT_LIST_HEAD(&bdev->bd_inodes);
468 INIT_LIST_HEAD(&bdev->bd_list);
469#ifdef CONFIG_SYSFS
470 INIT_LIST_HEAD(&bdev->bd_holder_disks);
471#endif
472 inode_init_once(&ei->vfs_inode);
473
474 mutex_init(&bdev->bd_fsfreeze_mutex);
475}
476
477static inline void __bd_forget(struct inode *inode)
478{
479 list_del_init(&inode->i_devices);
480 inode->i_bdev = NULL;
481 inode->i_mapping = &inode->i_data;
482}
483
484static void bdev_evict_inode(struct inode *inode)
485{
486 struct block_device *bdev = &BDEV_I(inode)->bdev;
487 struct list_head *p;
488 truncate_inode_pages(&inode->i_data, 0);
489 invalidate_inode_buffers(inode);
490 end_writeback(inode);
491 spin_lock(&bdev_lock);
492 while ( (p = bdev->bd_inodes.next) != &bdev->bd_inodes ) {
493 __bd_forget(list_entry(p, struct inode, i_devices));
494 }
495 list_del_init(&bdev->bd_list);
496 spin_unlock(&bdev_lock);
497}
498
499static const struct super_operations bdev_sops = {
500 .statfs = simple_statfs,
501 .alloc_inode = bdev_alloc_inode,
502 .destroy_inode = bdev_destroy_inode,
503 .drop_inode = generic_delete_inode,
504 .evict_inode = bdev_evict_inode,
505};
506
507static struct dentry *bd_mount(struct file_system_type *fs_type,
508 int flags, const char *dev_name, void *data)
509{
510 return mount_pseudo(fs_type, "bdev:", &bdev_sops, NULL, BDEVFS_MAGIC);
511}
512
513static struct file_system_type bd_type = {
514 .name = "bdev",
515 .mount = bd_mount,
516 .kill_sb = kill_anon_super,
517};
518
519static struct super_block *blockdev_superblock __read_mostly;
520
521void __init bdev_cache_init(void)
522{
523 int err;
524 static struct vfsmount *bd_mnt;
525
526 bdev_cachep = kmem_cache_create("bdev_cache", sizeof(struct bdev_inode),
527 0, (SLAB_HWCACHE_ALIGN|SLAB_RECLAIM_ACCOUNT|
528 SLAB_MEM_SPREAD|SLAB_PANIC),
529 init_once);
530 err = register_filesystem(&bd_type);
531 if (err)
532 panic("Cannot register bdev pseudo-fs");
533 bd_mnt = kern_mount(&bd_type);
534 if (IS_ERR(bd_mnt))
535 panic("Cannot create bdev pseudo-fs");
536 blockdev_superblock = bd_mnt->mnt_sb;
537}
538
539
540
541
542
543
544static inline unsigned long hash(dev_t dev)
545{
546 return MAJOR(dev)+MINOR(dev);
547}
548
549static int bdev_test(struct inode *inode, void *data)
550{
551 return BDEV_I(inode)->bdev.bd_dev == *(dev_t *)data;
552}
553
554static int bdev_set(struct inode *inode, void *data)
555{
556 BDEV_I(inode)->bdev.bd_dev = *(dev_t *)data;
557 return 0;
558}
559
560static LIST_HEAD(all_bdevs);
561
562struct block_device *bdget(dev_t dev)
563{
564 struct block_device *bdev;
565 struct inode *inode;
566
567 inode = iget5_locked(blockdev_superblock, hash(dev),
568 bdev_test, bdev_set, &dev);
569
570 if (!inode)
571 return NULL;
572
573 bdev = &BDEV_I(inode)->bdev;
574
575 if (inode->i_state & I_NEW) {
576 bdev->bd_contains = NULL;
577 bdev->bd_super = NULL;
578 bdev->bd_inode = inode;
579 bdev->bd_block_size = (1 << inode->i_blkbits);
580 bdev->bd_part_count = 0;
581 bdev->bd_invalidated = 0;
582 inode->i_mode = S_IFBLK;
583 inode->i_rdev = dev;
584 inode->i_bdev = bdev;
585 inode->i_data.a_ops = &def_blk_aops;
586 mapping_set_gfp_mask(&inode->i_data, GFP_USER);
587 inode->i_data.backing_dev_info = &default_backing_dev_info;
588 spin_lock(&bdev_lock);
589 list_add(&bdev->bd_list, &all_bdevs);
590 spin_unlock(&bdev_lock);
591 unlock_new_inode(inode);
592 }
593 return bdev;
594}
595
596EXPORT_SYMBOL(bdget);
597
598
599
600
601
602struct block_device *bdgrab(struct block_device *bdev)
603{
604 ihold(bdev->bd_inode);
605 return bdev;
606}
607
608long nr_blockdev_pages(void)
609{
610 struct block_device *bdev;
611 long ret = 0;
612 spin_lock(&bdev_lock);
613 list_for_each_entry(bdev, &all_bdevs, bd_list) {
614 ret += bdev->bd_inode->i_mapping->nrpages;
615 }
616 spin_unlock(&bdev_lock);
617 return ret;
618}
619
620void bdput(struct block_device *bdev)
621{
622 iput(bdev->bd_inode);
623}
624
625EXPORT_SYMBOL(bdput);
626
627static struct block_device *bd_acquire(struct inode *inode)
628{
629 struct block_device *bdev;
630
631 spin_lock(&bdev_lock);
632 bdev = inode->i_bdev;
633 if (bdev) {
634 ihold(bdev->bd_inode);
635 spin_unlock(&bdev_lock);
636 return bdev;
637 }
638 spin_unlock(&bdev_lock);
639
640 bdev = bdget(inode->i_rdev);
641 if (bdev) {
642 spin_lock(&bdev_lock);
643 if (!inode->i_bdev) {
644
645
646
647
648
649
650 ihold(bdev->bd_inode);
651 inode->i_bdev = bdev;
652 inode->i_mapping = bdev->bd_inode->i_mapping;
653 list_add(&inode->i_devices, &bdev->bd_inodes);
654 }
655 spin_unlock(&bdev_lock);
656 }
657 return bdev;
658}
659
660static inline int sb_is_blkdev_sb(struct super_block *sb)
661{
662 return sb == blockdev_superblock;
663}
664
665
666
667void bd_forget(struct inode *inode)
668{
669 struct block_device *bdev = NULL;
670
671 spin_lock(&bdev_lock);
672 if (inode->i_bdev) {
673 if (!sb_is_blkdev_sb(inode->i_sb))
674 bdev = inode->i_bdev;
675 __bd_forget(inode);
676 }
677 spin_unlock(&bdev_lock);
678
679 if (bdev)
680 iput(bdev->bd_inode);
681}
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697static bool bd_may_claim(struct block_device *bdev, struct block_device *whole,
698 void *holder)
699{
700 if (bdev->bd_holder == holder)
701 return true;
702 else if (bdev->bd_holder != NULL)
703 return false;
704 else if (bdev->bd_contains == bdev)
705 return true;
706
707 else if (whole->bd_holder == bd_may_claim)
708 return true;
709 else if (whole->bd_holder != NULL)
710 return false;
711 else
712 return true;
713}
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733static int bd_prepare_to_claim(struct block_device *bdev,
734 struct block_device *whole, void *holder)
735{
736retry:
737
738 if (!bd_may_claim(bdev, whole, holder))
739 return -EBUSY;
740
741
742 if (whole->bd_claiming) {
743 wait_queue_head_t *wq = bit_waitqueue(&whole->bd_claiming, 0);
744 DEFINE_WAIT(wait);
745
746 prepare_to_wait(wq, &wait, TASK_UNINTERRUPTIBLE);
747 spin_unlock(&bdev_lock);
748 schedule();
749 finish_wait(wq, &wait);
750 spin_lock(&bdev_lock);
751 goto retry;
752 }
753
754
755 return 0;
756}
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781static struct block_device *bd_start_claiming(struct block_device *bdev,
782 void *holder)
783{
784 struct gendisk *disk;
785 struct block_device *whole;
786 int partno, err;
787
788 might_sleep();
789
790
791
792
793
794 disk = get_gendisk(bdev->bd_dev, &partno);
795 if (!disk)
796 return ERR_PTR(-ENXIO);
797
798
799
800
801
802
803
804
805
806 if (partno)
807 whole = bdget_disk(disk, 0);
808 else
809 whole = bdgrab(bdev);
810
811 module_put(disk->fops->owner);
812 put_disk(disk);
813 if (!whole)
814 return ERR_PTR(-ENOMEM);
815
816
817 spin_lock(&bdev_lock);
818
819 err = bd_prepare_to_claim(bdev, whole, holder);
820 if (err == 0) {
821 whole->bd_claiming = holder;
822 spin_unlock(&bdev_lock);
823 return whole;
824 } else {
825 spin_unlock(&bdev_lock);
826 bdput(whole);
827 return ERR_PTR(err);
828 }
829}
830
831#ifdef CONFIG_SYSFS
832struct bd_holder_disk {
833 struct list_head list;
834 struct gendisk *disk;
835 int refcnt;
836};
837
838static struct bd_holder_disk *bd_find_holder_disk(struct block_device *bdev,
839 struct gendisk *disk)
840{
841 struct bd_holder_disk *holder;
842
843 list_for_each_entry(holder, &bdev->bd_holder_disks, list)
844 if (holder->disk == disk)
845 return holder;
846 return NULL;
847}
848
849static int add_symlink(struct kobject *from, struct kobject *to)
850{
851 return sysfs_create_link(from, to, kobject_name(to));
852}
853
854static void del_symlink(struct kobject *from, struct kobject *to)
855{
856 sysfs_remove_link(from, kobject_name(to));
857}
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887int bd_link_disk_holder(struct block_device *bdev, struct gendisk *disk)
888{
889 struct bd_holder_disk *holder;
890 int ret = 0;
891
892 mutex_lock(&bdev->bd_mutex);
893
894 WARN_ON_ONCE(!bdev->bd_holder);
895
896
897 if (WARN_ON(!disk->slave_dir || !bdev->bd_part->holder_dir))
898 goto out_unlock;
899
900 holder = bd_find_holder_disk(bdev, disk);
901 if (holder) {
902 holder->refcnt++;
903 goto out_unlock;
904 }
905
906 holder = kzalloc(sizeof(*holder), GFP_KERNEL);
907 if (!holder) {
908 ret = -ENOMEM;
909 goto out_unlock;
910 }
911
912 INIT_LIST_HEAD(&holder->list);
913 holder->disk = disk;
914 holder->refcnt = 1;
915
916 ret = add_symlink(disk->slave_dir, &part_to_dev(bdev->bd_part)->kobj);
917 if (ret)
918 goto out_free;
919
920 ret = add_symlink(bdev->bd_part->holder_dir, &disk_to_dev(disk)->kobj);
921 if (ret)
922 goto out_del;
923
924
925
926
927 kobject_get(bdev->bd_part->holder_dir);
928
929 list_add(&holder->list, &bdev->bd_holder_disks);
930 goto out_unlock;
931
932out_del:
933 del_symlink(disk->slave_dir, &part_to_dev(bdev->bd_part)->kobj);
934out_free:
935 kfree(holder);
936out_unlock:
937 mutex_unlock(&bdev->bd_mutex);
938 return ret;
939}
940EXPORT_SYMBOL_GPL(bd_link_disk_holder);
941
942
943
944
945
946
947
948
949
950
951
952void bd_unlink_disk_holder(struct block_device *bdev, struct gendisk *disk)
953{
954 struct bd_holder_disk *holder;
955
956 mutex_lock(&bdev->bd_mutex);
957
958 holder = bd_find_holder_disk(bdev, disk);
959
960 if (!WARN_ON_ONCE(holder == NULL) && !--holder->refcnt) {
961 del_symlink(disk->slave_dir, &part_to_dev(bdev->bd_part)->kobj);
962 del_symlink(bdev->bd_part->holder_dir,
963 &disk_to_dev(disk)->kobj);
964 kobject_put(bdev->bd_part->holder_dir);
965 list_del_init(&holder->list);
966 kfree(holder);
967 }
968
969 mutex_unlock(&bdev->bd_mutex);
970}
971EXPORT_SYMBOL_GPL(bd_unlink_disk_holder);
972#endif
973
974
975
976
977
978
979
980
981
982
983
984static void flush_disk(struct block_device *bdev, bool kill_dirty)
985{
986 if (__invalidate_device(bdev, kill_dirty)) {
987 char name[BDEVNAME_SIZE] = "";
988
989 if (bdev->bd_disk)
990 disk_name(bdev->bd_disk, 0, name);
991 printk(KERN_WARNING "VFS: busy inodes on changed media or "
992 "resized disk %s\n", name);
993 }
994
995 if (!bdev->bd_disk)
996 return;
997 if (disk_part_scan_enabled(bdev->bd_disk))
998 bdev->bd_invalidated = 1;
999}
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009void check_disk_size_change(struct gendisk *disk, struct block_device *bdev)
1010{
1011 loff_t disk_size, bdev_size;
1012
1013 disk_size = (loff_t)get_capacity(disk) << 9;
1014 bdev_size = i_size_read(bdev->bd_inode);
1015 if (disk_size != bdev_size) {
1016 char name[BDEVNAME_SIZE];
1017
1018 disk_name(disk, 0, name);
1019 printk(KERN_INFO
1020 "%s: detected capacity change from %lld to %lld\n",
1021 name, bdev_size, disk_size);
1022 i_size_write(bdev->bd_inode, disk_size);
1023 flush_disk(bdev, false);
1024 }
1025}
1026EXPORT_SYMBOL(check_disk_size_change);
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036int revalidate_disk(struct gendisk *disk)
1037{
1038 struct block_device *bdev;
1039 int ret = 0;
1040
1041 if (disk->fops->revalidate_disk)
1042 ret = disk->fops->revalidate_disk(disk);
1043
1044 bdev = bdget_disk(disk, 0);
1045 if (!bdev)
1046 return ret;
1047
1048 mutex_lock(&bdev->bd_mutex);
1049 check_disk_size_change(disk, bdev);
1050 mutex_unlock(&bdev->bd_mutex);
1051 bdput(bdev);
1052 return ret;
1053}
1054EXPORT_SYMBOL(revalidate_disk);
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065int check_disk_change(struct block_device *bdev)
1066{
1067 struct gendisk *disk = bdev->bd_disk;
1068 const struct block_device_operations *bdops = disk->fops;
1069 unsigned int events;
1070
1071 events = disk_clear_events(disk, DISK_EVENT_MEDIA_CHANGE |
1072 DISK_EVENT_EJECT_REQUEST);
1073 if (!(events & DISK_EVENT_MEDIA_CHANGE))
1074 return 0;
1075
1076 flush_disk(bdev, true);
1077 if (bdops->revalidate_disk)
1078 bdops->revalidate_disk(bdev->bd_disk);
1079 return 1;
1080}
1081
1082EXPORT_SYMBOL(check_disk_change);
1083
1084void bd_set_size(struct block_device *bdev, loff_t size)
1085{
1086 unsigned bsize = bdev_logical_block_size(bdev);
1087
1088 bdev->bd_inode->i_size = size;
1089 while (bsize < PAGE_CACHE_SIZE) {
1090 if (size & bsize)
1091 break;
1092 bsize <<= 1;
1093 }
1094 bdev->bd_block_size = bsize;
1095 bdev->bd_inode->i_blkbits = blksize_bits(bsize);
1096}
1097EXPORT_SYMBOL(bd_set_size);
1098
1099static int __blkdev_put(struct block_device *bdev, fmode_t mode, int for_part);
1100
1101
1102
1103
1104
1105
1106
1107
1108static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part)
1109{
1110 struct gendisk *disk;
1111 struct module *owner;
1112 int ret;
1113 int partno;
1114 int perm = 0;
1115
1116 if (mode & FMODE_READ)
1117 perm |= MAY_READ;
1118 if (mode & FMODE_WRITE)
1119 perm |= MAY_WRITE;
1120
1121
1122
1123 if (!for_part) {
1124 ret = devcgroup_inode_permission(bdev->bd_inode, perm);
1125 if (ret != 0) {
1126 bdput(bdev);
1127 return ret;
1128 }
1129 }
1130
1131 restart:
1132
1133 ret = -ENXIO;
1134 disk = get_gendisk(bdev->bd_dev, &partno);
1135 if (!disk)
1136 goto out;
1137 owner = disk->fops->owner;
1138
1139 disk_block_events(disk);
1140 mutex_lock_nested(&bdev->bd_mutex, for_part);
1141 if (!bdev->bd_openers) {
1142 bdev->bd_disk = disk;
1143 bdev->bd_queue = disk->queue;
1144 bdev->bd_contains = bdev;
1145 if (!partno) {
1146 struct backing_dev_info *bdi;
1147
1148 ret = -ENXIO;
1149 bdev->bd_part = disk_get_part(disk, partno);
1150 if (!bdev->bd_part)
1151 goto out_clear;
1152
1153 ret = 0;
1154 if (disk->fops->open) {
1155 ret = disk->fops->open(bdev, mode);
1156 if (ret == -ERESTARTSYS) {
1157
1158
1159
1160
1161 disk_put_part(bdev->bd_part);
1162 bdev->bd_part = NULL;
1163 bdev->bd_disk = NULL;
1164 bdev->bd_queue = NULL;
1165 mutex_unlock(&bdev->bd_mutex);
1166 disk_unblock_events(disk);
1167 put_disk(disk);
1168 module_put(owner);
1169 goto restart;
1170 }
1171 }
1172
1173 if (!ret && !bdev->bd_openers) {
1174 bd_set_size(bdev,(loff_t)get_capacity(disk)<<9);
1175 bdi = blk_get_backing_dev_info(bdev);
1176 if (bdi == NULL)
1177 bdi = &default_backing_dev_info;
1178 bdev_inode_switch_bdi(bdev->bd_inode, bdi);
1179 }
1180
1181
1182
1183
1184
1185
1186
1187 if (bdev->bd_invalidated) {
1188 if (!ret)
1189 rescan_partitions(disk, bdev);
1190 else if (ret == -ENOMEDIUM)
1191 invalidate_partitions(disk, bdev);
1192 }
1193 if (ret)
1194 goto out_clear;
1195 } else {
1196 struct block_device *whole;
1197 whole = bdget_disk(disk, 0);
1198 ret = -ENOMEM;
1199 if (!whole)
1200 goto out_clear;
1201 BUG_ON(for_part);
1202 ret = __blkdev_get(whole, mode, 1);
1203 if (ret)
1204 goto out_clear;
1205 bdev->bd_contains = whole;
1206 bdev_inode_switch_bdi(bdev->bd_inode,
1207 whole->bd_inode->i_data.backing_dev_info);
1208 bdev->bd_part = disk_get_part(disk, partno);
1209 if (!(disk->flags & GENHD_FL_UP) ||
1210 !bdev->bd_part || !bdev->bd_part->nr_sects) {
1211 ret = -ENXIO;
1212 goto out_clear;
1213 }
1214 bd_set_size(bdev, (loff_t)bdev->bd_part->nr_sects << 9);
1215 }
1216 } else {
1217 if (bdev->bd_contains == bdev) {
1218 ret = 0;
1219 if (bdev->bd_disk->fops->open)
1220 ret = bdev->bd_disk->fops->open(bdev, mode);
1221
1222 if (bdev->bd_invalidated) {
1223 if (!ret)
1224 rescan_partitions(bdev->bd_disk, bdev);
1225 else if (ret == -ENOMEDIUM)
1226 invalidate_partitions(bdev->bd_disk, bdev);
1227 }
1228 if (ret)
1229 goto out_unlock_bdev;
1230 }
1231
1232 put_disk(disk);
1233 module_put(owner);
1234 }
1235 bdev->bd_openers++;
1236 if (for_part)
1237 bdev->bd_part_count++;
1238 mutex_unlock(&bdev->bd_mutex);
1239 disk_unblock_events(disk);
1240 return 0;
1241
1242 out_clear:
1243 disk_put_part(bdev->bd_part);
1244 bdev->bd_disk = NULL;
1245 bdev->bd_part = NULL;
1246 bdev->bd_queue = NULL;
1247 bdev_inode_switch_bdi(bdev->bd_inode, &default_backing_dev_info);
1248 if (bdev != bdev->bd_contains)
1249 __blkdev_put(bdev->bd_contains, mode, 1);
1250 bdev->bd_contains = NULL;
1251 out_unlock_bdev:
1252 mutex_unlock(&bdev->bd_mutex);
1253 disk_unblock_events(disk);
1254 put_disk(disk);
1255 module_put(owner);
1256 out:
1257 bdput(bdev);
1258
1259 return ret;
1260}
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281int blkdev_get(struct block_device *bdev, fmode_t mode, void *holder)
1282{
1283 struct block_device *whole = NULL;
1284 int res;
1285
1286 WARN_ON_ONCE((mode & FMODE_EXCL) && !holder);
1287
1288 if ((mode & FMODE_EXCL) && holder) {
1289 whole = bd_start_claiming(bdev, holder);
1290 if (IS_ERR(whole)) {
1291 bdput(bdev);
1292 return PTR_ERR(whole);
1293 }
1294 }
1295
1296 res = __blkdev_get(bdev, mode, 0);
1297
1298 if (whole) {
1299 struct gendisk *disk = whole->bd_disk;
1300
1301
1302 mutex_lock(&bdev->bd_mutex);
1303 spin_lock(&bdev_lock);
1304
1305 if (!res) {
1306 BUG_ON(!bd_may_claim(bdev, whole, holder));
1307
1308
1309
1310
1311
1312
1313 whole->bd_holders++;
1314 whole->bd_holder = bd_may_claim;
1315 bdev->bd_holders++;
1316 bdev->bd_holder = holder;
1317 }
1318
1319
1320 BUG_ON(whole->bd_claiming != holder);
1321 whole->bd_claiming = NULL;
1322 wake_up_bit(&whole->bd_claiming, 0);
1323
1324 spin_unlock(&bdev_lock);
1325
1326
1327
1328
1329
1330
1331
1332
1333 if (!res && (mode & FMODE_WRITE) && !bdev->bd_write_holder &&
1334 (disk->flags & GENHD_FL_BLOCK_EVENTS_ON_EXCL_WRITE)) {
1335 bdev->bd_write_holder = true;
1336 disk_block_events(disk);
1337 }
1338
1339 mutex_unlock(&bdev->bd_mutex);
1340 bdput(whole);
1341 }
1342
1343 return res;
1344}
1345EXPORT_SYMBOL(blkdev_get);
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364struct block_device *blkdev_get_by_path(const char *path, fmode_t mode,
1365 void *holder)
1366{
1367 struct block_device *bdev;
1368 int err;
1369
1370 bdev = lookup_bdev(path);
1371 if (IS_ERR(bdev))
1372 return bdev;
1373
1374 err = blkdev_get(bdev, mode, holder);
1375 if (err)
1376 return ERR_PTR(err);
1377
1378 if ((mode & FMODE_WRITE) && bdev_read_only(bdev)) {
1379 blkdev_put(bdev, mode);
1380 return ERR_PTR(-EACCES);
1381 }
1382
1383 return bdev;
1384}
1385EXPORT_SYMBOL(blkdev_get_by_path);
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409struct block_device *blkdev_get_by_dev(dev_t dev, fmode_t mode, void *holder)
1410{
1411 struct block_device *bdev;
1412 int err;
1413
1414 bdev = bdget(dev);
1415 if (!bdev)
1416 return ERR_PTR(-ENOMEM);
1417
1418 err = blkdev_get(bdev, mode, holder);
1419 if (err)
1420 return ERR_PTR(err);
1421
1422 return bdev;
1423}
1424EXPORT_SYMBOL(blkdev_get_by_dev);
1425
1426static int blkdev_open(struct inode * inode, struct file * filp)
1427{
1428 struct block_device *bdev;
1429
1430
1431
1432
1433
1434
1435
1436 filp->f_flags |= O_LARGEFILE;
1437
1438 if (filp->f_flags & O_NDELAY)
1439 filp->f_mode |= FMODE_NDELAY;
1440 if (filp->f_flags & O_EXCL)
1441 filp->f_mode |= FMODE_EXCL;
1442 if ((filp->f_flags & O_ACCMODE) == 3)
1443 filp->f_mode |= FMODE_WRITE_IOCTL;
1444
1445 bdev = bd_acquire(inode);
1446 if (bdev == NULL)
1447 return -ENOMEM;
1448
1449 filp->f_mapping = bdev->bd_inode->i_mapping;
1450
1451 return blkdev_get(bdev, filp->f_mode, filp);
1452}
1453
1454static int __blkdev_put(struct block_device *bdev, fmode_t mode, int for_part)
1455{
1456 int ret = 0;
1457 struct gendisk *disk = bdev->bd_disk;
1458 struct block_device *victim = NULL;
1459
1460 mutex_lock_nested(&bdev->bd_mutex, for_part);
1461 if (for_part)
1462 bdev->bd_part_count--;
1463
1464 if (!--bdev->bd_openers) {
1465 WARN_ON_ONCE(bdev->bd_holders);
1466 sync_blockdev(bdev);
1467 kill_bdev(bdev);
1468
1469
1470
1471 bdev_inode_switch_bdi(bdev->bd_inode,
1472 &default_backing_dev_info);
1473 }
1474 if (bdev->bd_contains == bdev) {
1475 if (disk->fops->release)
1476 ret = disk->fops->release(disk, mode);
1477 }
1478 if (!bdev->bd_openers) {
1479 struct module *owner = disk->fops->owner;
1480
1481 disk_put_part(bdev->bd_part);
1482 bdev->bd_part = NULL;
1483 bdev->bd_disk = NULL;
1484 if (bdev != bdev->bd_contains)
1485 victim = bdev->bd_contains;
1486 bdev->bd_contains = NULL;
1487
1488 put_disk(disk);
1489 module_put(owner);
1490 }
1491 mutex_unlock(&bdev->bd_mutex);
1492 bdput(bdev);
1493 if (victim)
1494 __blkdev_put(victim, mode, 1);
1495 return ret;
1496}
1497
1498int blkdev_put(struct block_device *bdev, fmode_t mode)
1499{
1500 mutex_lock(&bdev->bd_mutex);
1501
1502 if (mode & FMODE_EXCL) {
1503 bool bdev_free;
1504
1505
1506
1507
1508
1509
1510 spin_lock(&bdev_lock);
1511
1512 WARN_ON_ONCE(--bdev->bd_holders < 0);
1513 WARN_ON_ONCE(--bdev->bd_contains->bd_holders < 0);
1514
1515
1516 if ((bdev_free = !bdev->bd_holders))
1517 bdev->bd_holder = NULL;
1518 if (!bdev->bd_contains->bd_holders)
1519 bdev->bd_contains->bd_holder = NULL;
1520
1521 spin_unlock(&bdev_lock);
1522
1523
1524
1525
1526
1527 if (bdev_free && bdev->bd_write_holder) {
1528 disk_unblock_events(bdev->bd_disk);
1529 bdev->bd_write_holder = false;
1530 }
1531 }
1532
1533
1534
1535
1536
1537
1538 disk_flush_events(bdev->bd_disk, DISK_EVENT_MEDIA_CHANGE);
1539
1540 mutex_unlock(&bdev->bd_mutex);
1541
1542 return __blkdev_put(bdev, mode, 0);
1543}
1544EXPORT_SYMBOL(blkdev_put);
1545
1546static int blkdev_close(struct inode * inode, struct file * filp)
1547{
1548 struct block_device *bdev = I_BDEV(filp->f_mapping->host);
1549
1550 return blkdev_put(bdev, filp->f_mode);
1551}
1552
1553static long block_ioctl(struct file *file, unsigned cmd, unsigned long arg)
1554{
1555 struct block_device *bdev = I_BDEV(file->f_mapping->host);
1556 fmode_t mode = file->f_mode;
1557
1558
1559
1560
1561
1562 if (file->f_flags & O_NDELAY)
1563 mode |= FMODE_NDELAY;
1564 else
1565 mode &= ~FMODE_NDELAY;
1566
1567 return blkdev_ioctl(bdev, mode, cmd, arg);
1568}
1569
1570
1571
1572
1573
1574
1575
1576
1577ssize_t blkdev_aio_write(struct kiocb *iocb, const struct iovec *iov,
1578 unsigned long nr_segs, loff_t pos)
1579{
1580 struct file *file = iocb->ki_filp;
1581 ssize_t ret;
1582
1583 BUG_ON(iocb->ki_pos != pos);
1584
1585 ret = __generic_file_aio_write(iocb, iov, nr_segs, &iocb->ki_pos);
1586 if (ret > 0 || ret == -EIOCBQUEUED) {
1587 ssize_t err;
1588
1589 err = generic_write_sync(file, pos, ret);
1590 if (err < 0 && ret > 0)
1591 ret = err;
1592 }
1593 return ret;
1594}
1595EXPORT_SYMBOL_GPL(blkdev_aio_write);
1596
1597
1598
1599
1600
1601static int blkdev_releasepage(struct page *page, gfp_t wait)
1602{
1603 struct super_block *super = BDEV_I(page->mapping->host)->bdev.bd_super;
1604
1605 if (super && super->s_op->bdev_try_to_free_page)
1606 return super->s_op->bdev_try_to_free_page(super, page, wait);
1607
1608 return try_to_free_buffers(page);
1609}
1610
1611static const struct address_space_operations def_blk_aops = {
1612 .readpage = blkdev_readpage,
1613 .writepage = blkdev_writepage,
1614 .write_begin = blkdev_write_begin,
1615 .write_end = blkdev_write_end,
1616 .writepages = generic_writepages,
1617 .releasepage = blkdev_releasepage,
1618 .direct_IO = blkdev_direct_IO,
1619};
1620
1621const struct file_operations def_blk_fops = {
1622 .open = blkdev_open,
1623 .release = blkdev_close,
1624 .llseek = block_llseek,
1625 .read = do_sync_read,
1626 .write = do_sync_write,
1627 .aio_read = generic_file_aio_read,
1628 .aio_write = blkdev_aio_write,
1629 .mmap = generic_file_mmap,
1630 .fsync = blkdev_fsync,
1631 .unlocked_ioctl = block_ioctl,
1632#ifdef CONFIG_COMPAT
1633 .compat_ioctl = compat_blkdev_ioctl,
1634#endif
1635 .splice_read = generic_file_splice_read,
1636 .splice_write = generic_file_splice_write,
1637};
1638
1639int ioctl_by_bdev(struct block_device *bdev, unsigned cmd, unsigned long arg)
1640{
1641 int res;
1642 mm_segment_t old_fs = get_fs();
1643 set_fs(KERNEL_DS);
1644 res = blkdev_ioctl(bdev, 0, cmd, arg);
1645 set_fs(old_fs);
1646 return res;
1647}
1648
1649EXPORT_SYMBOL(ioctl_by_bdev);
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659struct block_device *lookup_bdev(const char *pathname)
1660{
1661 struct block_device *bdev;
1662 struct inode *inode;
1663 struct path path;
1664 int error;
1665
1666 if (!pathname || !*pathname)
1667 return ERR_PTR(-EINVAL);
1668
1669 error = kern_path(pathname, LOOKUP_FOLLOW, &path);
1670 if (error)
1671 return ERR_PTR(error);
1672
1673 inode = path.dentry->d_inode;
1674 error = -ENOTBLK;
1675 if (!S_ISBLK(inode->i_mode))
1676 goto fail;
1677 error = -EACCES;
1678 if (path.mnt->mnt_flags & MNT_NODEV)
1679 goto fail;
1680 error = -ENOMEM;
1681 bdev = bd_acquire(inode);
1682 if (!bdev)
1683 goto fail;
1684out:
1685 path_put(&path);
1686 return bdev;
1687fail:
1688 bdev = ERR_PTR(error);
1689 goto out;
1690}
1691EXPORT_SYMBOL(lookup_bdev);
1692
1693int __invalidate_device(struct block_device *bdev, bool kill_dirty)
1694{
1695 struct super_block *sb = get_super(bdev);
1696 int res = 0;
1697
1698 if (sb) {
1699
1700
1701
1702
1703
1704
1705 shrink_dcache_sb(sb);
1706 res = invalidate_inodes(sb, kill_dirty);
1707 drop_super(sb);
1708 }
1709 invalidate_bdev(bdev);
1710 return res;
1711}
1712EXPORT_SYMBOL(__invalidate_device);
1713