1
2
3
4
5
6
7
8#include <linux/init.h>
9#include <linux/mm.h>
10#include <linux/fcntl.h>
11#include <linux/slab.h>
12#include <linux/kmod.h>
13#include <linux/major.h>
14#include <linux/device_cgroup.h>
15#include <linux/highmem.h>
16#include <linux/blkdev.h>
17#include <linux/module.h>
18#include <linux/blkpg.h>
19#include <linux/buffer_head.h>
20#include <linux/pagevec.h>
21#include <linux/writeback.h>
22#include <linux/mpage.h>
23#include <linux/mount.h>
24#include <linux/uio.h>
25#include <linux/namei.h>
26#include <linux/log2.h>
27#include <linux/kmemleak.h>
28#include <asm/uaccess.h>
29#include "internal.h"
30
31struct bdev_inode {
32 struct block_device bdev;
33 struct inode vfs_inode;
34};
35
36static const struct address_space_operations def_blk_aops;
37
38static inline struct bdev_inode *BDEV_I(struct inode *inode)
39{
40 return container_of(inode, struct bdev_inode, vfs_inode);
41}
42
43inline struct block_device *I_BDEV(struct inode *inode)
44{
45 return &BDEV_I(inode)->bdev;
46}
47
48EXPORT_SYMBOL(I_BDEV);
49
50
51
52
53
54
55static void bdev_inode_switch_bdi(struct inode *inode,
56 struct backing_dev_info *dst)
57{
58 spin_lock(&inode_lock);
59 inode->i_data.backing_dev_info = dst;
60 if (inode->i_state & I_DIRTY)
61 list_move(&inode->i_wb_list, &dst->wb.b_dirty);
62 spin_unlock(&inode_lock);
63}
64
65static sector_t max_block(struct block_device *bdev)
66{
67 sector_t retval = ~((sector_t)0);
68 loff_t sz = i_size_read(bdev->bd_inode);
69
70 if (sz) {
71 unsigned int size = block_size(bdev);
72 unsigned int sizebits = blksize_bits(size);
73 retval = (sz >> sizebits);
74 }
75 return retval;
76}
77
78
79static void kill_bdev(struct block_device *bdev)
80{
81 if (bdev->bd_inode->i_mapping->nrpages == 0)
82 return;
83 invalidate_bh_lrus();
84 truncate_inode_pages(bdev->bd_inode->i_mapping, 0);
85}
86
87int set_blocksize(struct block_device *bdev, int size)
88{
89
90 if (size > PAGE_SIZE || size < 512 || !is_power_of_2(size))
91 return -EINVAL;
92
93
94 if (size < bdev_logical_block_size(bdev))
95 return -EINVAL;
96
97
98 if (bdev->bd_block_size != size) {
99 sync_blockdev(bdev);
100 bdev->bd_block_size = size;
101 bdev->bd_inode->i_blkbits = blksize_bits(size);
102 kill_bdev(bdev);
103 }
104 return 0;
105}
106
107EXPORT_SYMBOL(set_blocksize);
108
109int sb_set_blocksize(struct super_block *sb, int size)
110{
111 if (set_blocksize(sb->s_bdev, size))
112 return 0;
113
114
115 sb->s_blocksize = size;
116 sb->s_blocksize_bits = blksize_bits(size);
117 return sb->s_blocksize;
118}
119
120EXPORT_SYMBOL(sb_set_blocksize);
121
122int sb_min_blocksize(struct super_block *sb, int size)
123{
124 int minsize = bdev_logical_block_size(sb->s_bdev);
125 if (size < minsize)
126 size = minsize;
127 return sb_set_blocksize(sb, size);
128}
129
130EXPORT_SYMBOL(sb_min_blocksize);
131
132static int
133blkdev_get_block(struct inode *inode, sector_t iblock,
134 struct buffer_head *bh, int create)
135{
136 if (iblock >= max_block(I_BDEV(inode))) {
137 if (create)
138 return -EIO;
139
140
141
142
143
144
145
146 return 0;
147 }
148 bh->b_bdev = I_BDEV(inode);
149 bh->b_blocknr = iblock;
150 set_buffer_mapped(bh);
151 return 0;
152}
153
154static int
155blkdev_get_blocks(struct inode *inode, sector_t iblock,
156 struct buffer_head *bh, int create)
157{
158 sector_t end_block = max_block(I_BDEV(inode));
159 unsigned long max_blocks = bh->b_size >> inode->i_blkbits;
160
161 if ((iblock + max_blocks) > end_block) {
162 max_blocks = end_block - iblock;
163 if ((long)max_blocks <= 0) {
164 if (create)
165 return -EIO;
166
167
168
169
170 max_blocks = 0;
171 }
172 }
173
174 bh->b_bdev = I_BDEV(inode);
175 bh->b_blocknr = iblock;
176 bh->b_size = max_blocks << inode->i_blkbits;
177 if (max_blocks)
178 set_buffer_mapped(bh);
179 return 0;
180}
181
182static ssize_t
183blkdev_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov,
184 loff_t offset, unsigned long nr_segs)
185{
186 struct file *file = iocb->ki_filp;
187 struct inode *inode = file->f_mapping->host;
188
189 return __blockdev_direct_IO(rw, iocb, inode, I_BDEV(inode), iov, offset,
190 nr_segs, blkdev_get_blocks, NULL, NULL, 0);
191}
192
193int __sync_blockdev(struct block_device *bdev, int wait)
194{
195 if (!bdev)
196 return 0;
197 if (!wait)
198 return filemap_flush(bdev->bd_inode->i_mapping);
199 return filemap_write_and_wait(bdev->bd_inode->i_mapping);
200}
201
202
203
204
205
206int sync_blockdev(struct block_device *bdev)
207{
208 return __sync_blockdev(bdev, 1);
209}
210EXPORT_SYMBOL(sync_blockdev);
211
212
213
214
215
216
217int fsync_bdev(struct block_device *bdev)
218{
219 struct super_block *sb = get_super(bdev);
220 if (sb) {
221 int res = sync_filesystem(sb);
222 drop_super(sb);
223 return res;
224 }
225 return sync_blockdev(bdev);
226}
227EXPORT_SYMBOL(fsync_bdev);
228
229
230
231
232
233
234
235
236
237
238
239
240
241struct super_block *freeze_bdev(struct block_device *bdev)
242{
243 struct super_block *sb;
244 int error = 0;
245
246 mutex_lock(&bdev->bd_fsfreeze_mutex);
247 if (++bdev->bd_fsfreeze_count > 1) {
248
249
250
251
252
253 sb = get_super(bdev);
254 drop_super(sb);
255 mutex_unlock(&bdev->bd_fsfreeze_mutex);
256 return sb;
257 }
258
259 sb = get_active_super(bdev);
260 if (!sb)
261 goto out;
262 error = freeze_super(sb);
263 if (error) {
264 deactivate_super(sb);
265 bdev->bd_fsfreeze_count--;
266 mutex_unlock(&bdev->bd_fsfreeze_mutex);
267 return ERR_PTR(error);
268 }
269 deactivate_super(sb);
270 out:
271 sync_blockdev(bdev);
272 mutex_unlock(&bdev->bd_fsfreeze_mutex);
273 return sb;
274}
275EXPORT_SYMBOL(freeze_bdev);
276
277
278
279
280
281
282
283
284int thaw_bdev(struct block_device *bdev, struct super_block *sb)
285{
286 int error = -EINVAL;
287
288 mutex_lock(&bdev->bd_fsfreeze_mutex);
289 if (!bdev->bd_fsfreeze_count)
290 goto out;
291
292 error = 0;
293 if (--bdev->bd_fsfreeze_count > 0)
294 goto out;
295
296 if (!sb)
297 goto out;
298
299 error = thaw_super(sb);
300 if (error) {
301 bdev->bd_fsfreeze_count++;
302 mutex_unlock(&bdev->bd_fsfreeze_mutex);
303 return error;
304 }
305out:
306 mutex_unlock(&bdev->bd_fsfreeze_mutex);
307 return 0;
308}
309EXPORT_SYMBOL(thaw_bdev);
310
311static int blkdev_writepage(struct page *page, struct writeback_control *wbc)
312{
313 return block_write_full_page(page, blkdev_get_block, wbc);
314}
315
316static int blkdev_readpage(struct file * file, struct page * page)
317{
318 return block_read_full_page(page, blkdev_get_block);
319}
320
321static int blkdev_write_begin(struct file *file, struct address_space *mapping,
322 loff_t pos, unsigned len, unsigned flags,
323 struct page **pagep, void **fsdata)
324{
325 return block_write_begin(mapping, pos, len, flags, pagep,
326 blkdev_get_block);
327}
328
329static int blkdev_write_end(struct file *file, struct address_space *mapping,
330 loff_t pos, unsigned len, unsigned copied,
331 struct page *page, void *fsdata)
332{
333 int ret;
334 ret = block_write_end(file, mapping, pos, len, copied, page, fsdata);
335
336 unlock_page(page);
337 page_cache_release(page);
338
339 return ret;
340}
341
342
343
344
345
346
347static loff_t block_llseek(struct file *file, loff_t offset, int origin)
348{
349 struct inode *bd_inode = file->f_mapping->host;
350 loff_t size;
351 loff_t retval;
352
353 mutex_lock(&bd_inode->i_mutex);
354 size = i_size_read(bd_inode);
355
356 switch (origin) {
357 case 2:
358 offset += size;
359 break;
360 case 1:
361 offset += file->f_pos;
362 }
363 retval = -EINVAL;
364 if (offset >= 0 && offset <= size) {
365 if (offset != file->f_pos) {
366 file->f_pos = offset;
367 }
368 retval = offset;
369 }
370 mutex_unlock(&bd_inode->i_mutex);
371 return retval;
372}
373
374int blkdev_fsync(struct file *filp, int datasync)
375{
376 struct inode *bd_inode = filp->f_mapping->host;
377 struct block_device *bdev = I_BDEV(bd_inode);
378 int error;
379
380
381
382
383
384
385 mutex_unlock(&bd_inode->i_mutex);
386
387 error = blkdev_issue_flush(bdev, GFP_KERNEL, NULL);
388 if (error == -EOPNOTSUPP)
389 error = 0;
390
391 mutex_lock(&bd_inode->i_mutex);
392
393 return error;
394}
395EXPORT_SYMBOL(blkdev_fsync);
396
397
398
399
400
401static __cacheline_aligned_in_smp DEFINE_SPINLOCK(bdev_lock);
402static struct kmem_cache * bdev_cachep __read_mostly;
403
404static struct inode *bdev_alloc_inode(struct super_block *sb)
405{
406 struct bdev_inode *ei = kmem_cache_alloc(bdev_cachep, GFP_KERNEL);
407 if (!ei)
408 return NULL;
409 return &ei->vfs_inode;
410}
411
412static void bdev_i_callback(struct rcu_head *head)
413{
414 struct inode *inode = container_of(head, struct inode, i_rcu);
415 struct bdev_inode *bdi = BDEV_I(inode);
416
417 INIT_LIST_HEAD(&inode->i_dentry);
418 kmem_cache_free(bdev_cachep, bdi);
419}
420
421static void bdev_destroy_inode(struct inode *inode)
422{
423 call_rcu(&inode->i_rcu, bdev_i_callback);
424}
425
426static void init_once(void *foo)
427{
428 struct bdev_inode *ei = (struct bdev_inode *) foo;
429 struct block_device *bdev = &ei->bdev;
430
431 memset(bdev, 0, sizeof(*bdev));
432 mutex_init(&bdev->bd_mutex);
433 INIT_LIST_HEAD(&bdev->bd_inodes);
434 INIT_LIST_HEAD(&bdev->bd_list);
435#ifdef CONFIG_SYSFS
436 INIT_LIST_HEAD(&bdev->bd_holder_disks);
437#endif
438 inode_init_once(&ei->vfs_inode);
439
440 mutex_init(&bdev->bd_fsfreeze_mutex);
441}
442
443static inline void __bd_forget(struct inode *inode)
444{
445 list_del_init(&inode->i_devices);
446 inode->i_bdev = NULL;
447 inode->i_mapping = &inode->i_data;
448}
449
450static void bdev_evict_inode(struct inode *inode)
451{
452 struct block_device *bdev = &BDEV_I(inode)->bdev;
453 struct list_head *p;
454 truncate_inode_pages(&inode->i_data, 0);
455 invalidate_inode_buffers(inode);
456 end_writeback(inode);
457 spin_lock(&bdev_lock);
458 while ( (p = bdev->bd_inodes.next) != &bdev->bd_inodes ) {
459 __bd_forget(list_entry(p, struct inode, i_devices));
460 }
461 list_del_init(&bdev->bd_list);
462 spin_unlock(&bdev_lock);
463}
464
465static const struct super_operations bdev_sops = {
466 .statfs = simple_statfs,
467 .alloc_inode = bdev_alloc_inode,
468 .destroy_inode = bdev_destroy_inode,
469 .drop_inode = generic_delete_inode,
470 .evict_inode = bdev_evict_inode,
471};
472
473static struct dentry *bd_mount(struct file_system_type *fs_type,
474 int flags, const char *dev_name, void *data)
475{
476 return mount_pseudo(fs_type, "bdev:", &bdev_sops, NULL, 0x62646576);
477}
478
479static struct file_system_type bd_type = {
480 .name = "bdev",
481 .mount = bd_mount,
482 .kill_sb = kill_anon_super,
483};
484
485struct super_block *blockdev_superblock __read_mostly;
486
487void __init bdev_cache_init(void)
488{
489 int err;
490 struct vfsmount *bd_mnt;
491
492 bdev_cachep = kmem_cache_create("bdev_cache", sizeof(struct bdev_inode),
493 0, (SLAB_HWCACHE_ALIGN|SLAB_RECLAIM_ACCOUNT|
494 SLAB_MEM_SPREAD|SLAB_PANIC),
495 init_once);
496 err = register_filesystem(&bd_type);
497 if (err)
498 panic("Cannot register bdev pseudo-fs");
499 bd_mnt = kern_mount(&bd_type);
500 if (IS_ERR(bd_mnt))
501 panic("Cannot create bdev pseudo-fs");
502
503
504
505
506 kmemleak_not_leak(bd_mnt);
507 blockdev_superblock = bd_mnt->mnt_sb;
508}
509
510
511
512
513
514
515static inline unsigned long hash(dev_t dev)
516{
517 return MAJOR(dev)+MINOR(dev);
518}
519
520static int bdev_test(struct inode *inode, void *data)
521{
522 return BDEV_I(inode)->bdev.bd_dev == *(dev_t *)data;
523}
524
525static int bdev_set(struct inode *inode, void *data)
526{
527 BDEV_I(inode)->bdev.bd_dev = *(dev_t *)data;
528 return 0;
529}
530
531static LIST_HEAD(all_bdevs);
532
533struct block_device *bdget(dev_t dev)
534{
535 struct block_device *bdev;
536 struct inode *inode;
537
538 inode = iget5_locked(blockdev_superblock, hash(dev),
539 bdev_test, bdev_set, &dev);
540
541 if (!inode)
542 return NULL;
543
544 bdev = &BDEV_I(inode)->bdev;
545
546 if (inode->i_state & I_NEW) {
547 bdev->bd_contains = NULL;
548 bdev->bd_inode = inode;
549 bdev->bd_block_size = (1 << inode->i_blkbits);
550 bdev->bd_part_count = 0;
551 bdev->bd_invalidated = 0;
552 inode->i_mode = S_IFBLK;
553 inode->i_rdev = dev;
554 inode->i_bdev = bdev;
555 inode->i_data.a_ops = &def_blk_aops;
556 mapping_set_gfp_mask(&inode->i_data, GFP_USER);
557 inode->i_data.backing_dev_info = &default_backing_dev_info;
558 spin_lock(&bdev_lock);
559 list_add(&bdev->bd_list, &all_bdevs);
560 spin_unlock(&bdev_lock);
561 unlock_new_inode(inode);
562 }
563 return bdev;
564}
565
566EXPORT_SYMBOL(bdget);
567
568
569
570
571
572struct block_device *bdgrab(struct block_device *bdev)
573{
574 ihold(bdev->bd_inode);
575 return bdev;
576}
577
578long nr_blockdev_pages(void)
579{
580 struct block_device *bdev;
581 long ret = 0;
582 spin_lock(&bdev_lock);
583 list_for_each_entry(bdev, &all_bdevs, bd_list) {
584 ret += bdev->bd_inode->i_mapping->nrpages;
585 }
586 spin_unlock(&bdev_lock);
587 return ret;
588}
589
590void bdput(struct block_device *bdev)
591{
592 iput(bdev->bd_inode);
593}
594
595EXPORT_SYMBOL(bdput);
596
597static struct block_device *bd_acquire(struct inode *inode)
598{
599 struct block_device *bdev;
600
601 spin_lock(&bdev_lock);
602 bdev = inode->i_bdev;
603 if (bdev) {
604 ihold(bdev->bd_inode);
605 spin_unlock(&bdev_lock);
606 return bdev;
607 }
608 spin_unlock(&bdev_lock);
609
610 bdev = bdget(inode->i_rdev);
611 if (bdev) {
612 spin_lock(&bdev_lock);
613 if (!inode->i_bdev) {
614
615
616
617
618
619
620 ihold(bdev->bd_inode);
621 inode->i_bdev = bdev;
622 inode->i_mapping = bdev->bd_inode->i_mapping;
623 list_add(&inode->i_devices, &bdev->bd_inodes);
624 }
625 spin_unlock(&bdev_lock);
626 }
627 return bdev;
628}
629
630
631
632void bd_forget(struct inode *inode)
633{
634 struct block_device *bdev = NULL;
635
636 spin_lock(&bdev_lock);
637 if (inode->i_bdev) {
638 if (!sb_is_blkdev_sb(inode->i_sb))
639 bdev = inode->i_bdev;
640 __bd_forget(inode);
641 }
642 spin_unlock(&bdev_lock);
643
644 if (bdev)
645 iput(bdev->bd_inode);
646}
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662static bool bd_may_claim(struct block_device *bdev, struct block_device *whole,
663 void *holder)
664{
665 if (bdev->bd_holder == holder)
666 return true;
667 else if (bdev->bd_holder != NULL)
668 return false;
669 else if (bdev->bd_contains == bdev)
670 return true;
671
672 else if (whole->bd_holder == bd_may_claim)
673 return true;
674 else if (whole->bd_holder != NULL)
675 return false;
676 else
677 return true;
678}
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698static int bd_prepare_to_claim(struct block_device *bdev,
699 struct block_device *whole, void *holder)
700{
701retry:
702
703 if (!bd_may_claim(bdev, whole, holder))
704 return -EBUSY;
705
706
707 if (whole->bd_claiming) {
708 wait_queue_head_t *wq = bit_waitqueue(&whole->bd_claiming, 0);
709 DEFINE_WAIT(wait);
710
711 prepare_to_wait(wq, &wait, TASK_UNINTERRUPTIBLE);
712 spin_unlock(&bdev_lock);
713 schedule();
714 finish_wait(wq, &wait);
715 spin_lock(&bdev_lock);
716 goto retry;
717 }
718
719
720 return 0;
721}
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746static struct block_device *bd_start_claiming(struct block_device *bdev,
747 void *holder)
748{
749 struct gendisk *disk;
750 struct block_device *whole;
751 int partno, err;
752
753 might_sleep();
754
755
756
757
758
759 disk = get_gendisk(bdev->bd_dev, &partno);
760 if (!disk)
761 return ERR_PTR(-ENXIO);
762
763 whole = bdget_disk(disk, 0);
764 module_put(disk->fops->owner);
765 put_disk(disk);
766 if (!whole)
767 return ERR_PTR(-ENOMEM);
768
769
770 spin_lock(&bdev_lock);
771
772 err = bd_prepare_to_claim(bdev, whole, holder);
773 if (err == 0) {
774 whole->bd_claiming = holder;
775 spin_unlock(&bdev_lock);
776 return whole;
777 } else {
778 spin_unlock(&bdev_lock);
779 bdput(whole);
780 return ERR_PTR(err);
781 }
782}
783
784#ifdef CONFIG_SYSFS
785struct bd_holder_disk {
786 struct list_head list;
787 struct gendisk *disk;
788 int refcnt;
789};
790
791static struct bd_holder_disk *bd_find_holder_disk(struct block_device *bdev,
792 struct gendisk *disk)
793{
794 struct bd_holder_disk *holder;
795
796 list_for_each_entry(holder, &bdev->bd_holder_disks, list)
797 if (holder->disk == disk)
798 return holder;
799 return NULL;
800}
801
802static int add_symlink(struct kobject *from, struct kobject *to)
803{
804 return sysfs_create_link(from, to, kobject_name(to));
805}
806
807static void del_symlink(struct kobject *from, struct kobject *to)
808{
809 sysfs_remove_link(from, kobject_name(to));
810}
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840int bd_link_disk_holder(struct block_device *bdev, struct gendisk *disk)
841{
842 struct bd_holder_disk *holder;
843 int ret = 0;
844
845 mutex_lock(&bdev->bd_mutex);
846
847 WARN_ON_ONCE(!bdev->bd_holder);
848
849
850 if (WARN_ON(!disk->slave_dir || !bdev->bd_part->holder_dir))
851 goto out_unlock;
852
853 holder = bd_find_holder_disk(bdev, disk);
854 if (holder) {
855 holder->refcnt++;
856 goto out_unlock;
857 }
858
859 holder = kzalloc(sizeof(*holder), GFP_KERNEL);
860 if (!holder) {
861 ret = -ENOMEM;
862 goto out_unlock;
863 }
864
865 INIT_LIST_HEAD(&holder->list);
866 holder->disk = disk;
867 holder->refcnt = 1;
868
869 ret = add_symlink(disk->slave_dir, &part_to_dev(bdev->bd_part)->kobj);
870 if (ret)
871 goto out_free;
872
873 ret = add_symlink(bdev->bd_part->holder_dir, &disk_to_dev(disk)->kobj);
874 if (ret)
875 goto out_del;
876
877
878
879
880 kobject_get(bdev->bd_part->holder_dir);
881
882 list_add(&holder->list, &bdev->bd_holder_disks);
883 goto out_unlock;
884
885out_del:
886 del_symlink(disk->slave_dir, &part_to_dev(bdev->bd_part)->kobj);
887out_free:
888 kfree(holder);
889out_unlock:
890 mutex_unlock(&bdev->bd_mutex);
891 return ret;
892}
893EXPORT_SYMBOL_GPL(bd_link_disk_holder);
894
895
896
897
898
899
900
901
902
903
904
905void bd_unlink_disk_holder(struct block_device *bdev, struct gendisk *disk)
906{
907 struct bd_holder_disk *holder;
908
909 mutex_lock(&bdev->bd_mutex);
910
911 holder = bd_find_holder_disk(bdev, disk);
912
913 if (!WARN_ON_ONCE(holder == NULL) && !--holder->refcnt) {
914 del_symlink(disk->slave_dir, &part_to_dev(bdev->bd_part)->kobj);
915 del_symlink(bdev->bd_part->holder_dir,
916 &disk_to_dev(disk)->kobj);
917 kobject_put(bdev->bd_part->holder_dir);
918 list_del_init(&holder->list);
919 kfree(holder);
920 }
921
922 mutex_unlock(&bdev->bd_mutex);
923}
924EXPORT_SYMBOL_GPL(bd_unlink_disk_holder);
925#endif
926
927
928
929
930
931
932
933
934
935
936
937static void flush_disk(struct block_device *bdev, bool kill_dirty)
938{
939 if (__invalidate_device(bdev, kill_dirty)) {
940 char name[BDEVNAME_SIZE] = "";
941
942 if (bdev->bd_disk)
943 disk_name(bdev->bd_disk, 0, name);
944 printk(KERN_WARNING "VFS: busy inodes on changed media or "
945 "resized disk %s\n", name);
946 }
947
948 if (!bdev->bd_disk)
949 return;
950 if (disk_partitionable(bdev->bd_disk))
951 bdev->bd_invalidated = 1;
952}
953
954
955
956
957
958
959
960
961
962void check_disk_size_change(struct gendisk *disk, struct block_device *bdev)
963{
964 loff_t disk_size, bdev_size;
965
966 disk_size = (loff_t)get_capacity(disk) << 9;
967 bdev_size = i_size_read(bdev->bd_inode);
968 if (disk_size != bdev_size) {
969 char name[BDEVNAME_SIZE];
970
971 disk_name(disk, 0, name);
972 printk(KERN_INFO
973 "%s: detected capacity change from %lld to %lld\n",
974 name, bdev_size, disk_size);
975 i_size_write(bdev->bd_inode, disk_size);
976 flush_disk(bdev, false);
977 }
978}
979EXPORT_SYMBOL(check_disk_size_change);
980
981
982
983
984
985
986
987
988
989int revalidate_disk(struct gendisk *disk)
990{
991 struct block_device *bdev;
992 int ret = 0;
993
994 if (disk->fops->revalidate_disk)
995 ret = disk->fops->revalidate_disk(disk);
996
997 bdev = bdget_disk(disk, 0);
998 if (!bdev)
999 return ret;
1000
1001 mutex_lock(&bdev->bd_mutex);
1002 check_disk_size_change(disk, bdev);
1003 mutex_unlock(&bdev->bd_mutex);
1004 bdput(bdev);
1005 return ret;
1006}
1007EXPORT_SYMBOL(revalidate_disk);
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018int check_disk_change(struct block_device *bdev)
1019{
1020 struct gendisk *disk = bdev->bd_disk;
1021 const struct block_device_operations *bdops = disk->fops;
1022 unsigned int events;
1023
1024 events = disk_clear_events(disk, DISK_EVENT_MEDIA_CHANGE |
1025 DISK_EVENT_EJECT_REQUEST);
1026 if (!(events & DISK_EVENT_MEDIA_CHANGE))
1027 return 0;
1028
1029 flush_disk(bdev, true);
1030 if (bdops->revalidate_disk)
1031 bdops->revalidate_disk(bdev->bd_disk);
1032 return 1;
1033}
1034
1035EXPORT_SYMBOL(check_disk_change);
1036
1037void bd_set_size(struct block_device *bdev, loff_t size)
1038{
1039 unsigned bsize = bdev_logical_block_size(bdev);
1040
1041 bdev->bd_inode->i_size = size;
1042 while (bsize < PAGE_CACHE_SIZE) {
1043 if (size & bsize)
1044 break;
1045 bsize <<= 1;
1046 }
1047 bdev->bd_block_size = bsize;
1048 bdev->bd_inode->i_blkbits = blksize_bits(bsize);
1049}
1050EXPORT_SYMBOL(bd_set_size);
1051
1052static int __blkdev_put(struct block_device *bdev, fmode_t mode, int for_part);
1053
1054
1055
1056
1057
1058
1059
1060
1061static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part)
1062{
1063 struct gendisk *disk;
1064 int ret;
1065 int partno;
1066 int perm = 0;
1067
1068 if (mode & FMODE_READ)
1069 perm |= MAY_READ;
1070 if (mode & FMODE_WRITE)
1071 perm |= MAY_WRITE;
1072
1073
1074
1075 if (!for_part) {
1076 ret = devcgroup_inode_permission(bdev->bd_inode, perm);
1077 if (ret != 0) {
1078 bdput(bdev);
1079 return ret;
1080 }
1081 }
1082
1083 restart:
1084
1085 ret = -ENXIO;
1086 disk = get_gendisk(bdev->bd_dev, &partno);
1087 if (!disk)
1088 goto out;
1089
1090 mutex_lock_nested(&bdev->bd_mutex, for_part);
1091 if (!bdev->bd_openers) {
1092 bdev->bd_disk = disk;
1093 bdev->bd_contains = bdev;
1094 if (!partno) {
1095 struct backing_dev_info *bdi;
1096
1097 ret = -ENXIO;
1098 bdev->bd_part = disk_get_part(disk, partno);
1099 if (!bdev->bd_part)
1100 goto out_clear;
1101
1102 if (disk->fops->open) {
1103 ret = disk->fops->open(bdev, mode);
1104 if (ret == -ERESTARTSYS) {
1105
1106
1107
1108
1109 disk_put_part(bdev->bd_part);
1110 bdev->bd_part = NULL;
1111 module_put(disk->fops->owner);
1112 put_disk(disk);
1113 bdev->bd_disk = NULL;
1114 mutex_unlock(&bdev->bd_mutex);
1115 goto restart;
1116 }
1117 if (ret)
1118 goto out_clear;
1119 }
1120 if (!bdev->bd_openers) {
1121 bd_set_size(bdev,(loff_t)get_capacity(disk)<<9);
1122 bdi = blk_get_backing_dev_info(bdev);
1123 if (bdi == NULL)
1124 bdi = &default_backing_dev_info;
1125 bdev_inode_switch_bdi(bdev->bd_inode, bdi);
1126 }
1127 if (bdev->bd_invalidated)
1128 rescan_partitions(disk, bdev);
1129 } else {
1130 struct block_device *whole;
1131 whole = bdget_disk(disk, 0);
1132 ret = -ENOMEM;
1133 if (!whole)
1134 goto out_clear;
1135 BUG_ON(for_part);
1136 ret = __blkdev_get(whole, mode, 1);
1137 if (ret)
1138 goto out_clear;
1139 bdev->bd_contains = whole;
1140 bdev_inode_switch_bdi(bdev->bd_inode,
1141 whole->bd_inode->i_data.backing_dev_info);
1142 bdev->bd_part = disk_get_part(disk, partno);
1143 if (!(disk->flags & GENHD_FL_UP) ||
1144 !bdev->bd_part || !bdev->bd_part->nr_sects) {
1145 ret = -ENXIO;
1146 goto out_clear;
1147 }
1148 bd_set_size(bdev, (loff_t)bdev->bd_part->nr_sects << 9);
1149 }
1150 } else {
1151 module_put(disk->fops->owner);
1152 put_disk(disk);
1153 disk = NULL;
1154 if (bdev->bd_contains == bdev) {
1155 if (bdev->bd_disk->fops->open) {
1156 ret = bdev->bd_disk->fops->open(bdev, mode);
1157 if (ret)
1158 goto out_unlock_bdev;
1159 }
1160 if (bdev->bd_invalidated)
1161 rescan_partitions(bdev->bd_disk, bdev);
1162 }
1163 }
1164 bdev->bd_openers++;
1165 if (for_part)
1166 bdev->bd_part_count++;
1167 mutex_unlock(&bdev->bd_mutex);
1168 return 0;
1169
1170 out_clear:
1171 disk_put_part(bdev->bd_part);
1172 bdev->bd_disk = NULL;
1173 bdev->bd_part = NULL;
1174 bdev_inode_switch_bdi(bdev->bd_inode, &default_backing_dev_info);
1175 if (bdev != bdev->bd_contains)
1176 __blkdev_put(bdev->bd_contains, mode, 1);
1177 bdev->bd_contains = NULL;
1178 out_unlock_bdev:
1179 mutex_unlock(&bdev->bd_mutex);
1180 out:
1181 if (disk)
1182 module_put(disk->fops->owner);
1183 put_disk(disk);
1184 bdput(bdev);
1185
1186 return ret;
1187}
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208int blkdev_get(struct block_device *bdev, fmode_t mode, void *holder)
1209{
1210 struct block_device *whole = NULL;
1211 int res;
1212
1213 WARN_ON_ONCE((mode & FMODE_EXCL) && !holder);
1214
1215 if ((mode & FMODE_EXCL) && holder) {
1216 whole = bd_start_claiming(bdev, holder);
1217 if (IS_ERR(whole)) {
1218 bdput(bdev);
1219 return PTR_ERR(whole);
1220 }
1221 }
1222
1223 res = __blkdev_get(bdev, mode, 0);
1224
1225 if (whole) {
1226
1227 mutex_lock(&bdev->bd_mutex);
1228 spin_lock(&bdev_lock);
1229
1230 if (!res) {
1231 BUG_ON(!bd_may_claim(bdev, whole, holder));
1232
1233
1234
1235
1236
1237
1238 whole->bd_holders++;
1239 whole->bd_holder = bd_may_claim;
1240 bdev->bd_holders++;
1241 bdev->bd_holder = holder;
1242 }
1243
1244
1245 BUG_ON(whole->bd_claiming != holder);
1246 whole->bd_claiming = NULL;
1247 wake_up_bit(&whole->bd_claiming, 0);
1248
1249 spin_unlock(&bdev_lock);
1250
1251
1252
1253
1254
1255
1256
1257
1258 if (!res && (mode & FMODE_WRITE) && !bdev->bd_write_holder) {
1259 bdev->bd_write_holder = true;
1260 disk_block_events(bdev->bd_disk);
1261 }
1262
1263 mutex_unlock(&bdev->bd_mutex);
1264 bdput(whole);
1265 }
1266
1267 return res;
1268}
1269EXPORT_SYMBOL(blkdev_get);
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288struct block_device *blkdev_get_by_path(const char *path, fmode_t mode,
1289 void *holder)
1290{
1291 struct block_device *bdev;
1292 int err;
1293
1294 bdev = lookup_bdev(path);
1295 if (IS_ERR(bdev))
1296 return bdev;
1297
1298 err = blkdev_get(bdev, mode, holder);
1299 if (err)
1300 return ERR_PTR(err);
1301
1302 if ((mode & FMODE_WRITE) && bdev_read_only(bdev)) {
1303 blkdev_put(bdev, mode);
1304 return ERR_PTR(-EACCES);
1305 }
1306
1307 return bdev;
1308}
1309EXPORT_SYMBOL(blkdev_get_by_path);
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333struct block_device *blkdev_get_by_dev(dev_t dev, fmode_t mode, void *holder)
1334{
1335 struct block_device *bdev;
1336 int err;
1337
1338 bdev = bdget(dev);
1339 if (!bdev)
1340 return ERR_PTR(-ENOMEM);
1341
1342 err = blkdev_get(bdev, mode, holder);
1343 if (err)
1344 return ERR_PTR(err);
1345
1346 return bdev;
1347}
1348EXPORT_SYMBOL(blkdev_get_by_dev);
1349
1350static int blkdev_open(struct inode * inode, struct file * filp)
1351{
1352 struct block_device *bdev;
1353
1354
1355
1356
1357
1358
1359
1360 filp->f_flags |= O_LARGEFILE;
1361
1362 if (filp->f_flags & O_NDELAY)
1363 filp->f_mode |= FMODE_NDELAY;
1364 if (filp->f_flags & O_EXCL)
1365 filp->f_mode |= FMODE_EXCL;
1366 if ((filp->f_flags & O_ACCMODE) == 3)
1367 filp->f_mode |= FMODE_WRITE_IOCTL;
1368
1369 bdev = bd_acquire(inode);
1370 if (bdev == NULL)
1371 return -ENOMEM;
1372
1373 filp->f_mapping = bdev->bd_inode->i_mapping;
1374
1375 return blkdev_get(bdev, filp->f_mode, filp);
1376}
1377
1378static int __blkdev_put(struct block_device *bdev, fmode_t mode, int for_part)
1379{
1380 int ret = 0;
1381 struct gendisk *disk = bdev->bd_disk;
1382 struct block_device *victim = NULL;
1383
1384 mutex_lock_nested(&bdev->bd_mutex, for_part);
1385 if (for_part)
1386 bdev->bd_part_count--;
1387
1388 if (!--bdev->bd_openers) {
1389 WARN_ON_ONCE(bdev->bd_holders);
1390 sync_blockdev(bdev);
1391 kill_bdev(bdev);
1392 }
1393 if (bdev->bd_contains == bdev) {
1394 if (disk->fops->release)
1395 ret = disk->fops->release(disk, mode);
1396 }
1397 if (!bdev->bd_openers) {
1398 struct module *owner = disk->fops->owner;
1399
1400 put_disk(disk);
1401 module_put(owner);
1402 disk_put_part(bdev->bd_part);
1403 bdev->bd_part = NULL;
1404 bdev->bd_disk = NULL;
1405 bdev_inode_switch_bdi(bdev->bd_inode,
1406 &default_backing_dev_info);
1407 if (bdev != bdev->bd_contains)
1408 victim = bdev->bd_contains;
1409 bdev->bd_contains = NULL;
1410 }
1411 mutex_unlock(&bdev->bd_mutex);
1412 bdput(bdev);
1413 if (victim)
1414 __blkdev_put(victim, mode, 1);
1415 return ret;
1416}
1417
1418int blkdev_put(struct block_device *bdev, fmode_t mode)
1419{
1420 if (mode & FMODE_EXCL) {
1421 bool bdev_free;
1422
1423
1424
1425
1426
1427
1428 mutex_lock(&bdev->bd_mutex);
1429 spin_lock(&bdev_lock);
1430
1431 WARN_ON_ONCE(--bdev->bd_holders < 0);
1432 WARN_ON_ONCE(--bdev->bd_contains->bd_holders < 0);
1433
1434
1435 if ((bdev_free = !bdev->bd_holders))
1436 bdev->bd_holder = NULL;
1437 if (!bdev->bd_contains->bd_holders)
1438 bdev->bd_contains->bd_holder = NULL;
1439
1440 spin_unlock(&bdev_lock);
1441
1442
1443
1444
1445
1446 if (bdev_free) {
1447 if (bdev->bd_write_holder) {
1448 disk_unblock_events(bdev->bd_disk);
1449 bdev->bd_write_holder = false;
1450 } else
1451 disk_check_events(bdev->bd_disk);
1452 }
1453
1454 mutex_unlock(&bdev->bd_mutex);
1455 } else
1456 disk_check_events(bdev->bd_disk);
1457
1458 return __blkdev_put(bdev, mode, 0);
1459}
1460EXPORT_SYMBOL(blkdev_put);
1461
1462static int blkdev_close(struct inode * inode, struct file * filp)
1463{
1464 struct block_device *bdev = I_BDEV(filp->f_mapping->host);
1465
1466 return blkdev_put(bdev, filp->f_mode);
1467}
1468
1469static long block_ioctl(struct file *file, unsigned cmd, unsigned long arg)
1470{
1471 struct block_device *bdev = I_BDEV(file->f_mapping->host);
1472 fmode_t mode = file->f_mode;
1473
1474
1475
1476
1477
1478 if (file->f_flags & O_NDELAY)
1479 mode |= FMODE_NDELAY;
1480 else
1481 mode &= ~FMODE_NDELAY;
1482
1483 return blkdev_ioctl(bdev, mode, cmd, arg);
1484}
1485
1486
1487
1488
1489
1490
1491
1492
1493ssize_t blkdev_aio_write(struct kiocb *iocb, const struct iovec *iov,
1494 unsigned long nr_segs, loff_t pos)
1495{
1496 struct file *file = iocb->ki_filp;
1497 ssize_t ret;
1498
1499 BUG_ON(iocb->ki_pos != pos);
1500
1501 ret = __generic_file_aio_write(iocb, iov, nr_segs, &iocb->ki_pos);
1502 if (ret > 0 || ret == -EIOCBQUEUED) {
1503 ssize_t err;
1504
1505 err = generic_write_sync(file, pos, ret);
1506 if (err < 0 && ret > 0)
1507 ret = err;
1508 }
1509 return ret;
1510}
1511EXPORT_SYMBOL_GPL(blkdev_aio_write);
1512
1513
1514
1515
1516
1517static int blkdev_releasepage(struct page *page, gfp_t wait)
1518{
1519 struct super_block *super = BDEV_I(page->mapping->host)->bdev.bd_super;
1520
1521 if (super && super->s_op->bdev_try_to_free_page)
1522 return super->s_op->bdev_try_to_free_page(super, page, wait);
1523
1524 return try_to_free_buffers(page);
1525}
1526
1527static const struct address_space_operations def_blk_aops = {
1528 .readpage = blkdev_readpage,
1529 .writepage = blkdev_writepage,
1530 .sync_page = block_sync_page,
1531 .write_begin = blkdev_write_begin,
1532 .write_end = blkdev_write_end,
1533 .writepages = generic_writepages,
1534 .releasepage = blkdev_releasepage,
1535 .direct_IO = blkdev_direct_IO,
1536};
1537
1538const struct file_operations def_blk_fops = {
1539 .open = blkdev_open,
1540 .release = blkdev_close,
1541 .llseek = block_llseek,
1542 .read = do_sync_read,
1543 .write = do_sync_write,
1544 .aio_read = generic_file_aio_read,
1545 .aio_write = blkdev_aio_write,
1546 .mmap = generic_file_mmap,
1547 .fsync = blkdev_fsync,
1548 .unlocked_ioctl = block_ioctl,
1549#ifdef CONFIG_COMPAT
1550 .compat_ioctl = compat_blkdev_ioctl,
1551#endif
1552 .splice_read = generic_file_splice_read,
1553 .splice_write = generic_file_splice_write,
1554};
1555
1556int ioctl_by_bdev(struct block_device *bdev, unsigned cmd, unsigned long arg)
1557{
1558 int res;
1559 mm_segment_t old_fs = get_fs();
1560 set_fs(KERNEL_DS);
1561 res = blkdev_ioctl(bdev, 0, cmd, arg);
1562 set_fs(old_fs);
1563 return res;
1564}
1565
1566EXPORT_SYMBOL(ioctl_by_bdev);
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576struct block_device *lookup_bdev(const char *pathname)
1577{
1578 struct block_device *bdev;
1579 struct inode *inode;
1580 struct path path;
1581 int error;
1582
1583 if (!pathname || !*pathname)
1584 return ERR_PTR(-EINVAL);
1585
1586 error = kern_path(pathname, LOOKUP_FOLLOW, &path);
1587 if (error)
1588 return ERR_PTR(error);
1589
1590 inode = path.dentry->d_inode;
1591 error = -ENOTBLK;
1592 if (!S_ISBLK(inode->i_mode))
1593 goto fail;
1594 error = -EACCES;
1595 if (path.mnt->mnt_flags & MNT_NODEV)
1596 goto fail;
1597 error = -ENOMEM;
1598 bdev = bd_acquire(inode);
1599 if (!bdev)
1600 goto fail;
1601out:
1602 path_put(&path);
1603 return bdev;
1604fail:
1605 bdev = ERR_PTR(error);
1606 goto out;
1607}
1608EXPORT_SYMBOL(lookup_bdev);
1609
1610int __invalidate_device(struct block_device *bdev, bool kill_dirty)
1611{
1612 struct super_block *sb = get_super(bdev);
1613 int res = 0;
1614
1615 if (sb) {
1616
1617
1618
1619
1620
1621
1622 shrink_dcache_sb(sb);
1623 res = invalidate_inodes(sb, kill_dirty);
1624 drop_super(sb);
1625 }
1626 invalidate_bdev(bdev);
1627 return res;
1628}
1629EXPORT_SYMBOL(__invalidate_device);
1630