1
2
3
4
5
6
7
8#include <linux/init.h>
9#include <linux/mm.h>
10#include <linux/fcntl.h>
11#include <linux/slab.h>
12#include <linux/kmod.h>
13#include <linux/major.h>
14#include <linux/device_cgroup.h>
15#include <linux/highmem.h>
16#include <linux/blkdev.h>
17#include <linux/module.h>
18#include <linux/blkpg.h>
19#include <linux/magic.h>
20#include <linux/buffer_head.h>
21#include <linux/swap.h>
22#include <linux/pagevec.h>
23#include <linux/writeback.h>
24#include <linux/mpage.h>
25#include <linux/mount.h>
26#include <linux/uio.h>
27#include <linux/namei.h>
28#include <linux/log2.h>
29#include <linux/cleancache.h>
30#include <asm/uaccess.h>
31#include "internal.h"
32
33struct bdev_inode {
34 struct block_device bdev;
35 struct inode vfs_inode;
36};
37
38static const struct address_space_operations def_blk_aops;
39
40static inline struct bdev_inode *BDEV_I(struct inode *inode)
41{
42 return container_of(inode, struct bdev_inode, vfs_inode);
43}
44
45inline struct block_device *I_BDEV(struct inode *inode)
46{
47 return &BDEV_I(inode)->bdev;
48}
49EXPORT_SYMBOL(I_BDEV);
50
51static void bdev_write_inode(struct inode *inode)
52{
53 spin_lock(&inode->i_lock);
54 while (inode->i_state & I_DIRTY) {
55 spin_unlock(&inode->i_lock);
56 WARN_ON_ONCE(write_inode_now(inode, true));
57 spin_lock(&inode->i_lock);
58 }
59 spin_unlock(&inode->i_lock);
60}
61
62
63void kill_bdev(struct block_device *bdev)
64{
65 struct address_space *mapping = bdev->bd_inode->i_mapping;
66
67 if (mapping->nrpages == 0 && mapping->nrshadows == 0)
68 return;
69
70 invalidate_bh_lrus();
71 truncate_inode_pages(mapping, 0);
72}
73EXPORT_SYMBOL(kill_bdev);
74
75
76void invalidate_bdev(struct block_device *bdev)
77{
78 struct address_space *mapping = bdev->bd_inode->i_mapping;
79
80 if (mapping->nrpages == 0)
81 return;
82
83 invalidate_bh_lrus();
84 lru_add_drain_all();
85 invalidate_mapping_pages(mapping, 0, -1);
86
87
88
89 cleancache_invalidate_inode(mapping);
90}
91EXPORT_SYMBOL(invalidate_bdev);
92
93int set_blocksize(struct block_device *bdev, int size)
94{
95
96 if (size > PAGE_SIZE || size < 512 || !is_power_of_2(size))
97 return -EINVAL;
98
99
100 if (size < bdev_logical_block_size(bdev))
101 return -EINVAL;
102
103
104 if (bdev->bd_block_size != size) {
105 sync_blockdev(bdev);
106 bdev->bd_block_size = size;
107 bdev->bd_inode->i_blkbits = blksize_bits(size);
108 kill_bdev(bdev);
109 }
110 return 0;
111}
112
113EXPORT_SYMBOL(set_blocksize);
114
115int sb_set_blocksize(struct super_block *sb, int size)
116{
117 if (set_blocksize(sb->s_bdev, size))
118 return 0;
119
120
121 sb->s_blocksize = size;
122 sb->s_blocksize_bits = blksize_bits(size);
123 return sb->s_blocksize;
124}
125
126EXPORT_SYMBOL(sb_set_blocksize);
127
128int sb_min_blocksize(struct super_block *sb, int size)
129{
130 int minsize = bdev_logical_block_size(sb->s_bdev);
131 if (size < minsize)
132 size = minsize;
133 return sb_set_blocksize(sb, size);
134}
135
136EXPORT_SYMBOL(sb_min_blocksize);
137
138static int
139blkdev_get_block(struct inode *inode, sector_t iblock,
140 struct buffer_head *bh, int create)
141{
142 bh->b_bdev = I_BDEV(inode);
143 bh->b_blocknr = iblock;
144 set_buffer_mapped(bh);
145 return 0;
146}
147
148static ssize_t
149blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter, loff_t offset)
150{
151 struct file *file = iocb->ki_filp;
152 struct inode *inode = file->f_mapping->host;
153
154 return __blockdev_direct_IO(iocb, inode, I_BDEV(inode), iter, offset,
155 blkdev_get_block, NULL, NULL,
156 DIO_SKIP_DIO_COUNT);
157}
158
159int __sync_blockdev(struct block_device *bdev, int wait)
160{
161 if (!bdev)
162 return 0;
163 if (!wait)
164 return filemap_flush(bdev->bd_inode->i_mapping);
165 return filemap_write_and_wait(bdev->bd_inode->i_mapping);
166}
167
168
169
170
171
172int sync_blockdev(struct block_device *bdev)
173{
174 return __sync_blockdev(bdev, 1);
175}
176EXPORT_SYMBOL(sync_blockdev);
177
178
179
180
181
182
183int fsync_bdev(struct block_device *bdev)
184{
185 struct super_block *sb = get_super(bdev);
186 if (sb) {
187 int res = sync_filesystem(sb);
188 drop_super(sb);
189 return res;
190 }
191 return sync_blockdev(bdev);
192}
193EXPORT_SYMBOL(fsync_bdev);
194
195
196
197
198
199
200
201
202
203
204
205
206
207struct super_block *freeze_bdev(struct block_device *bdev)
208{
209 struct super_block *sb;
210 int error = 0;
211
212 mutex_lock(&bdev->bd_fsfreeze_mutex);
213 if (++bdev->bd_fsfreeze_count > 1) {
214
215
216
217
218
219 sb = get_super(bdev);
220 drop_super(sb);
221 mutex_unlock(&bdev->bd_fsfreeze_mutex);
222 return sb;
223 }
224
225 sb = get_active_super(bdev);
226 if (!sb)
227 goto out;
228 if (sb->s_op->freeze_super)
229 error = sb->s_op->freeze_super(sb);
230 else
231 error = freeze_super(sb);
232 if (error) {
233 deactivate_super(sb);
234 bdev->bd_fsfreeze_count--;
235 mutex_unlock(&bdev->bd_fsfreeze_mutex);
236 return ERR_PTR(error);
237 }
238 deactivate_super(sb);
239 out:
240 sync_blockdev(bdev);
241 mutex_unlock(&bdev->bd_fsfreeze_mutex);
242 return sb;
243}
244EXPORT_SYMBOL(freeze_bdev);
245
246
247
248
249
250
251
252
253int thaw_bdev(struct block_device *bdev, struct super_block *sb)
254{
255 int error = -EINVAL;
256
257 mutex_lock(&bdev->bd_fsfreeze_mutex);
258 if (!bdev->bd_fsfreeze_count)
259 goto out;
260
261 error = 0;
262 if (--bdev->bd_fsfreeze_count > 0)
263 goto out;
264
265 if (!sb)
266 goto out;
267
268 if (sb->s_op->thaw_super)
269 error = sb->s_op->thaw_super(sb);
270 else
271 error = thaw_super(sb);
272 if (error) {
273 bdev->bd_fsfreeze_count++;
274 mutex_unlock(&bdev->bd_fsfreeze_mutex);
275 return error;
276 }
277out:
278 mutex_unlock(&bdev->bd_fsfreeze_mutex);
279 return 0;
280}
281EXPORT_SYMBOL(thaw_bdev);
282
283static int blkdev_writepage(struct page *page, struct writeback_control *wbc)
284{
285 return block_write_full_page(page, blkdev_get_block, wbc);
286}
287
288static int blkdev_readpage(struct file * file, struct page * page)
289{
290 return block_read_full_page(page, blkdev_get_block);
291}
292
293static int blkdev_readpages(struct file *file, struct address_space *mapping,
294 struct list_head *pages, unsigned nr_pages)
295{
296 return mpage_readpages(mapping, pages, nr_pages, blkdev_get_block);
297}
298
299static int blkdev_write_begin(struct file *file, struct address_space *mapping,
300 loff_t pos, unsigned len, unsigned flags,
301 struct page **pagep, void **fsdata)
302{
303 return block_write_begin(mapping, pos, len, flags, pagep,
304 blkdev_get_block);
305}
306
307static int blkdev_write_end(struct file *file, struct address_space *mapping,
308 loff_t pos, unsigned len, unsigned copied,
309 struct page *page, void *fsdata)
310{
311 int ret;
312 ret = block_write_end(file, mapping, pos, len, copied, page, fsdata);
313
314 unlock_page(page);
315 page_cache_release(page);
316
317 return ret;
318}
319
320
321
322
323
324
325static loff_t block_llseek(struct file *file, loff_t offset, int whence)
326{
327 struct inode *bd_inode = file->f_mapping->host;
328 loff_t retval;
329
330 mutex_lock(&bd_inode->i_mutex);
331 retval = fixed_size_llseek(file, offset, whence, i_size_read(bd_inode));
332 mutex_unlock(&bd_inode->i_mutex);
333 return retval;
334}
335
336int blkdev_fsync(struct file *filp, loff_t start, loff_t end, int datasync)
337{
338 struct inode *bd_inode = filp->f_mapping->host;
339 struct block_device *bdev = I_BDEV(bd_inode);
340 int error;
341
342 error = filemap_write_and_wait_range(filp->f_mapping, start, end);
343 if (error)
344 return error;
345
346
347
348
349
350
351 error = blkdev_issue_flush(bdev, GFP_KERNEL, NULL);
352 if (error == -EOPNOTSUPP)
353 error = 0;
354
355 return error;
356}
357EXPORT_SYMBOL(blkdev_fsync);
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375int bdev_read_page(struct block_device *bdev, sector_t sector,
376 struct page *page)
377{
378 const struct block_device_operations *ops = bdev->bd_disk->fops;
379 if (!ops->rw_page)
380 return -EOPNOTSUPP;
381 return ops->rw_page(bdev, sector + get_start_sect(bdev), page, READ);
382}
383EXPORT_SYMBOL_GPL(bdev_read_page);
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404int bdev_write_page(struct block_device *bdev, sector_t sector,
405 struct page *page, struct writeback_control *wbc)
406{
407 int result;
408 int rw = (wbc->sync_mode == WB_SYNC_ALL) ? WRITE_SYNC : WRITE;
409 const struct block_device_operations *ops = bdev->bd_disk->fops;
410 if (!ops->rw_page)
411 return -EOPNOTSUPP;
412 set_page_writeback(page);
413 result = ops->rw_page(bdev, sector + get_start_sect(bdev), page, rw);
414 if (result)
415 end_page_writeback(page);
416 else
417 unlock_page(page);
418 return result;
419}
420EXPORT_SYMBOL_GPL(bdev_write_page);
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439long bdev_direct_access(struct block_device *bdev, sector_t sector,
440 void **addr, unsigned long *pfn, long size)
441{
442 long avail;
443 const struct block_device_operations *ops = bdev->bd_disk->fops;
444
445 if (size < 0)
446 return size;
447 if (!ops->direct_access)
448 return -EOPNOTSUPP;
449 if ((sector + DIV_ROUND_UP(size, 512)) >
450 part_nr_sects_read(bdev->bd_part))
451 return -ERANGE;
452 sector += get_start_sect(bdev);
453 if (sector % (PAGE_SIZE / 512))
454 return -EINVAL;
455 avail = ops->direct_access(bdev, sector, addr, pfn, size);
456 if (!avail)
457 return -ERANGE;
458 return min(avail, size);
459}
460EXPORT_SYMBOL_GPL(bdev_direct_access);
461
462
463
464
465
466static __cacheline_aligned_in_smp DEFINE_SPINLOCK(bdev_lock);
467static struct kmem_cache * bdev_cachep __read_mostly;
468
469static struct inode *bdev_alloc_inode(struct super_block *sb)
470{
471 struct bdev_inode *ei = kmem_cache_alloc(bdev_cachep, GFP_KERNEL);
472 if (!ei)
473 return NULL;
474 return &ei->vfs_inode;
475}
476
477static void bdev_i_callback(struct rcu_head *head)
478{
479 struct inode *inode = container_of(head, struct inode, i_rcu);
480 struct bdev_inode *bdi = BDEV_I(inode);
481
482 kmem_cache_free(bdev_cachep, bdi);
483}
484
485static void bdev_destroy_inode(struct inode *inode)
486{
487 call_rcu(&inode->i_rcu, bdev_i_callback);
488}
489
490static void init_once(void *foo)
491{
492 struct bdev_inode *ei = (struct bdev_inode *) foo;
493 struct block_device *bdev = &ei->bdev;
494
495 memset(bdev, 0, sizeof(*bdev));
496 mutex_init(&bdev->bd_mutex);
497 INIT_LIST_HEAD(&bdev->bd_inodes);
498 INIT_LIST_HEAD(&bdev->bd_list);
499#ifdef CONFIG_SYSFS
500 INIT_LIST_HEAD(&bdev->bd_holder_disks);
501#endif
502 inode_init_once(&ei->vfs_inode);
503
504 mutex_init(&bdev->bd_fsfreeze_mutex);
505}
506
507static inline void __bd_forget(struct inode *inode)
508{
509 list_del_init(&inode->i_devices);
510 inode->i_bdev = NULL;
511 inode->i_mapping = &inode->i_data;
512}
513
514static void bdev_evict_inode(struct inode *inode)
515{
516 struct block_device *bdev = &BDEV_I(inode)->bdev;
517 struct list_head *p;
518 truncate_inode_pages_final(&inode->i_data);
519 invalidate_inode_buffers(inode);
520 clear_inode(inode);
521 spin_lock(&bdev_lock);
522 while ( (p = bdev->bd_inodes.next) != &bdev->bd_inodes ) {
523 __bd_forget(list_entry(p, struct inode, i_devices));
524 }
525 list_del_init(&bdev->bd_list);
526 spin_unlock(&bdev_lock);
527}
528
529static const struct super_operations bdev_sops = {
530 .statfs = simple_statfs,
531 .alloc_inode = bdev_alloc_inode,
532 .destroy_inode = bdev_destroy_inode,
533 .drop_inode = generic_delete_inode,
534 .evict_inode = bdev_evict_inode,
535};
536
537static struct dentry *bd_mount(struct file_system_type *fs_type,
538 int flags, const char *dev_name, void *data)
539{
540 return mount_pseudo(fs_type, "bdev:", &bdev_sops, NULL, BDEVFS_MAGIC);
541}
542
543static struct file_system_type bd_type = {
544 .name = "bdev",
545 .mount = bd_mount,
546 .kill_sb = kill_anon_super,
547};
548
549static struct super_block *blockdev_superblock __read_mostly;
550
551void __init bdev_cache_init(void)
552{
553 int err;
554 static struct vfsmount *bd_mnt;
555
556 bdev_cachep = kmem_cache_create("bdev_cache", sizeof(struct bdev_inode),
557 0, (SLAB_HWCACHE_ALIGN|SLAB_RECLAIM_ACCOUNT|
558 SLAB_MEM_SPREAD|SLAB_PANIC),
559 init_once);
560 err = register_filesystem(&bd_type);
561 if (err)
562 panic("Cannot register bdev pseudo-fs");
563 bd_mnt = kern_mount(&bd_type);
564 if (IS_ERR(bd_mnt))
565 panic("Cannot create bdev pseudo-fs");
566 blockdev_superblock = bd_mnt->mnt_sb;
567}
568
569
570
571
572
573
574static inline unsigned long hash(dev_t dev)
575{
576 return MAJOR(dev)+MINOR(dev);
577}
578
579static int bdev_test(struct inode *inode, void *data)
580{
581 return BDEV_I(inode)->bdev.bd_dev == *(dev_t *)data;
582}
583
584static int bdev_set(struct inode *inode, void *data)
585{
586 BDEV_I(inode)->bdev.bd_dev = *(dev_t *)data;
587 return 0;
588}
589
590static LIST_HEAD(all_bdevs);
591
592struct block_device *bdget(dev_t dev)
593{
594 struct block_device *bdev;
595 struct inode *inode;
596
597 inode = iget5_locked(blockdev_superblock, hash(dev),
598 bdev_test, bdev_set, &dev);
599
600 if (!inode)
601 return NULL;
602
603 bdev = &BDEV_I(inode)->bdev;
604
605 if (inode->i_state & I_NEW) {
606 bdev->bd_contains = NULL;
607 bdev->bd_super = NULL;
608 bdev->bd_inode = inode;
609 bdev->bd_block_size = (1 << inode->i_blkbits);
610 bdev->bd_part_count = 0;
611 bdev->bd_invalidated = 0;
612 inode->i_mode = S_IFBLK;
613 inode->i_rdev = dev;
614 inode->i_bdev = bdev;
615 inode->i_data.a_ops = &def_blk_aops;
616 mapping_set_gfp_mask(&inode->i_data, GFP_USER);
617 spin_lock(&bdev_lock);
618 list_add(&bdev->bd_list, &all_bdevs);
619 spin_unlock(&bdev_lock);
620 unlock_new_inode(inode);
621 }
622 return bdev;
623}
624
625EXPORT_SYMBOL(bdget);
626
627
628
629
630
631struct block_device *bdgrab(struct block_device *bdev)
632{
633 ihold(bdev->bd_inode);
634 return bdev;
635}
636EXPORT_SYMBOL(bdgrab);
637
638long nr_blockdev_pages(void)
639{
640 struct block_device *bdev;
641 long ret = 0;
642 spin_lock(&bdev_lock);
643 list_for_each_entry(bdev, &all_bdevs, bd_list) {
644 ret += bdev->bd_inode->i_mapping->nrpages;
645 }
646 spin_unlock(&bdev_lock);
647 return ret;
648}
649
650void bdput(struct block_device *bdev)
651{
652 iput(bdev->bd_inode);
653}
654
655EXPORT_SYMBOL(bdput);
656
657static struct block_device *bd_acquire(struct inode *inode)
658{
659 struct block_device *bdev;
660
661 spin_lock(&bdev_lock);
662 bdev = inode->i_bdev;
663 if (bdev) {
664 ihold(bdev->bd_inode);
665 spin_unlock(&bdev_lock);
666 return bdev;
667 }
668 spin_unlock(&bdev_lock);
669
670 bdev = bdget(inode->i_rdev);
671 if (bdev) {
672 spin_lock(&bdev_lock);
673 if (!inode->i_bdev) {
674
675
676
677
678
679
680 ihold(bdev->bd_inode);
681 inode->i_bdev = bdev;
682 inode->i_mapping = bdev->bd_inode->i_mapping;
683 list_add(&inode->i_devices, &bdev->bd_inodes);
684 }
685 spin_unlock(&bdev_lock);
686 }
687 return bdev;
688}
689
690int sb_is_blkdev_sb(struct super_block *sb)
691{
692 return sb == blockdev_superblock;
693}
694
695
696
697void bd_forget(struct inode *inode)
698{
699 struct block_device *bdev = NULL;
700
701 spin_lock(&bdev_lock);
702 if (!sb_is_blkdev_sb(inode->i_sb))
703 bdev = inode->i_bdev;
704 __bd_forget(inode);
705 spin_unlock(&bdev_lock);
706
707 if (bdev)
708 iput(bdev->bd_inode);
709}
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725static bool bd_may_claim(struct block_device *bdev, struct block_device *whole,
726 void *holder)
727{
728 if (bdev->bd_holder == holder)
729 return true;
730 else if (bdev->bd_holder != NULL)
731 return false;
732 else if (bdev->bd_contains == bdev)
733 return true;
734
735 else if (whole->bd_holder == bd_may_claim)
736 return true;
737 else if (whole->bd_holder != NULL)
738 return false;
739 else
740 return true;
741}
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761static int bd_prepare_to_claim(struct block_device *bdev,
762 struct block_device *whole, void *holder)
763{
764retry:
765
766 if (!bd_may_claim(bdev, whole, holder))
767 return -EBUSY;
768
769
770 if (whole->bd_claiming) {
771 wait_queue_head_t *wq = bit_waitqueue(&whole->bd_claiming, 0);
772 DEFINE_WAIT(wait);
773
774 prepare_to_wait(wq, &wait, TASK_UNINTERRUPTIBLE);
775 spin_unlock(&bdev_lock);
776 schedule();
777 finish_wait(wq, &wait);
778 spin_lock(&bdev_lock);
779 goto retry;
780 }
781
782
783 return 0;
784}
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809static struct block_device *bd_start_claiming(struct block_device *bdev,
810 void *holder)
811{
812 struct gendisk *disk;
813 struct block_device *whole;
814 int partno, err;
815
816 might_sleep();
817
818
819
820
821
822 disk = get_gendisk(bdev->bd_dev, &partno);
823 if (!disk)
824 return ERR_PTR(-ENXIO);
825
826
827
828
829
830
831
832
833
834 if (partno)
835 whole = bdget_disk(disk, 0);
836 else
837 whole = bdgrab(bdev);
838
839 module_put(disk->fops->owner);
840 put_disk(disk);
841 if (!whole)
842 return ERR_PTR(-ENOMEM);
843
844
845 spin_lock(&bdev_lock);
846
847 err = bd_prepare_to_claim(bdev, whole, holder);
848 if (err == 0) {
849 whole->bd_claiming = holder;
850 spin_unlock(&bdev_lock);
851 return whole;
852 } else {
853 spin_unlock(&bdev_lock);
854 bdput(whole);
855 return ERR_PTR(err);
856 }
857}
858
859#ifdef CONFIG_SYSFS
860struct bd_holder_disk {
861 struct list_head list;
862 struct gendisk *disk;
863 int refcnt;
864};
865
866static struct bd_holder_disk *bd_find_holder_disk(struct block_device *bdev,
867 struct gendisk *disk)
868{
869 struct bd_holder_disk *holder;
870
871 list_for_each_entry(holder, &bdev->bd_holder_disks, list)
872 if (holder->disk == disk)
873 return holder;
874 return NULL;
875}
876
877static int add_symlink(struct kobject *from, struct kobject *to)
878{
879 return sysfs_create_link(from, to, kobject_name(to));
880}
881
882static void del_symlink(struct kobject *from, struct kobject *to)
883{
884 sysfs_remove_link(from, kobject_name(to));
885}
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915int bd_link_disk_holder(struct block_device *bdev, struct gendisk *disk)
916{
917 struct bd_holder_disk *holder;
918 int ret = 0;
919
920 mutex_lock(&bdev->bd_mutex);
921
922 WARN_ON_ONCE(!bdev->bd_holder);
923
924
925 if (WARN_ON(!disk->slave_dir || !bdev->bd_part->holder_dir))
926 goto out_unlock;
927
928 holder = bd_find_holder_disk(bdev, disk);
929 if (holder) {
930 holder->refcnt++;
931 goto out_unlock;
932 }
933
934 holder = kzalloc(sizeof(*holder), GFP_KERNEL);
935 if (!holder) {
936 ret = -ENOMEM;
937 goto out_unlock;
938 }
939
940 INIT_LIST_HEAD(&holder->list);
941 holder->disk = disk;
942 holder->refcnt = 1;
943
944 ret = add_symlink(disk->slave_dir, &part_to_dev(bdev->bd_part)->kobj);
945 if (ret)
946 goto out_free;
947
948 ret = add_symlink(bdev->bd_part->holder_dir, &disk_to_dev(disk)->kobj);
949 if (ret)
950 goto out_del;
951
952
953
954
955 kobject_get(bdev->bd_part->holder_dir);
956
957 list_add(&holder->list, &bdev->bd_holder_disks);
958 goto out_unlock;
959
960out_del:
961 del_symlink(disk->slave_dir, &part_to_dev(bdev->bd_part)->kobj);
962out_free:
963 kfree(holder);
964out_unlock:
965 mutex_unlock(&bdev->bd_mutex);
966 return ret;
967}
968EXPORT_SYMBOL_GPL(bd_link_disk_holder);
969
970
971
972
973
974
975
976
977
978
979
980void bd_unlink_disk_holder(struct block_device *bdev, struct gendisk *disk)
981{
982 struct bd_holder_disk *holder;
983
984 mutex_lock(&bdev->bd_mutex);
985
986 holder = bd_find_holder_disk(bdev, disk);
987
988 if (!WARN_ON_ONCE(holder == NULL) && !--holder->refcnt) {
989 del_symlink(disk->slave_dir, &part_to_dev(bdev->bd_part)->kobj);
990 del_symlink(bdev->bd_part->holder_dir,
991 &disk_to_dev(disk)->kobj);
992 kobject_put(bdev->bd_part->holder_dir);
993 list_del_init(&holder->list);
994 kfree(holder);
995 }
996
997 mutex_unlock(&bdev->bd_mutex);
998}
999EXPORT_SYMBOL_GPL(bd_unlink_disk_holder);
1000#endif
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012static void flush_disk(struct block_device *bdev, bool kill_dirty)
1013{
1014 if (__invalidate_device(bdev, kill_dirty)) {
1015 char name[BDEVNAME_SIZE] = "";
1016
1017 if (bdev->bd_disk)
1018 disk_name(bdev->bd_disk, 0, name);
1019 printk(KERN_WARNING "VFS: busy inodes on changed media or "
1020 "resized disk %s\n", name);
1021 }
1022
1023 if (!bdev->bd_disk)
1024 return;
1025 if (disk_part_scan_enabled(bdev->bd_disk))
1026 bdev->bd_invalidated = 1;
1027}
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037void check_disk_size_change(struct gendisk *disk, struct block_device *bdev)
1038{
1039 loff_t disk_size, bdev_size;
1040
1041 disk_size = (loff_t)get_capacity(disk) << 9;
1042 bdev_size = i_size_read(bdev->bd_inode);
1043 if (disk_size != bdev_size) {
1044 char name[BDEVNAME_SIZE];
1045
1046 disk_name(disk, 0, name);
1047 printk(KERN_INFO
1048 "%s: detected capacity change from %lld to %lld\n",
1049 name, bdev_size, disk_size);
1050 i_size_write(bdev->bd_inode, disk_size);
1051 flush_disk(bdev, false);
1052 }
1053}
1054EXPORT_SYMBOL(check_disk_size_change);
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064int revalidate_disk(struct gendisk *disk)
1065{
1066 struct block_device *bdev;
1067 int ret = 0;
1068
1069 if (disk->fops->revalidate_disk)
1070 ret = disk->fops->revalidate_disk(disk);
1071
1072 bdev = bdget_disk(disk, 0);
1073 if (!bdev)
1074 return ret;
1075
1076 mutex_lock(&bdev->bd_mutex);
1077 check_disk_size_change(disk, bdev);
1078 bdev->bd_invalidated = 0;
1079 mutex_unlock(&bdev->bd_mutex);
1080 bdput(bdev);
1081 return ret;
1082}
1083EXPORT_SYMBOL(revalidate_disk);
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094int check_disk_change(struct block_device *bdev)
1095{
1096 struct gendisk *disk = bdev->bd_disk;
1097 const struct block_device_operations *bdops = disk->fops;
1098 unsigned int events;
1099
1100 events = disk_clear_events(disk, DISK_EVENT_MEDIA_CHANGE |
1101 DISK_EVENT_EJECT_REQUEST);
1102 if (!(events & DISK_EVENT_MEDIA_CHANGE))
1103 return 0;
1104
1105 flush_disk(bdev, true);
1106 if (bdops->revalidate_disk)
1107 bdops->revalidate_disk(bdev->bd_disk);
1108 return 1;
1109}
1110
1111EXPORT_SYMBOL(check_disk_change);
1112
1113void bd_set_size(struct block_device *bdev, loff_t size)
1114{
1115 unsigned bsize = bdev_logical_block_size(bdev);
1116
1117 mutex_lock(&bdev->bd_inode->i_mutex);
1118 i_size_write(bdev->bd_inode, size);
1119 mutex_unlock(&bdev->bd_inode->i_mutex);
1120 while (bsize < PAGE_CACHE_SIZE) {
1121 if (size & bsize)
1122 break;
1123 bsize <<= 1;
1124 }
1125 bdev->bd_block_size = bsize;
1126 bdev->bd_inode->i_blkbits = blksize_bits(bsize);
1127}
1128EXPORT_SYMBOL(bd_set_size);
1129
1130static void __blkdev_put(struct block_device *bdev, fmode_t mode, int for_part);
1131
1132
1133
1134
1135
1136
1137
1138
1139static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part)
1140{
1141 struct gendisk *disk;
1142 struct module *owner;
1143 int ret;
1144 int partno;
1145 int perm = 0;
1146
1147 if (mode & FMODE_READ)
1148 perm |= MAY_READ;
1149 if (mode & FMODE_WRITE)
1150 perm |= MAY_WRITE;
1151
1152
1153
1154 if (!for_part) {
1155 ret = devcgroup_inode_permission(bdev->bd_inode, perm);
1156 if (ret != 0) {
1157 bdput(bdev);
1158 return ret;
1159 }
1160 }
1161
1162 restart:
1163
1164 ret = -ENXIO;
1165 disk = get_gendisk(bdev->bd_dev, &partno);
1166 if (!disk)
1167 goto out;
1168 owner = disk->fops->owner;
1169
1170 disk_block_events(disk);
1171 mutex_lock_nested(&bdev->bd_mutex, for_part);
1172 if (!bdev->bd_openers) {
1173 bdev->bd_disk = disk;
1174 bdev->bd_queue = disk->queue;
1175 bdev->bd_contains = bdev;
1176 if (!partno) {
1177 ret = -ENXIO;
1178 bdev->bd_part = disk_get_part(disk, partno);
1179 if (!bdev->bd_part)
1180 goto out_clear;
1181
1182 ret = 0;
1183 if (disk->fops->open) {
1184 ret = disk->fops->open(bdev, mode);
1185 if (ret == -ERESTARTSYS) {
1186
1187
1188
1189
1190 disk_put_part(bdev->bd_part);
1191 bdev->bd_part = NULL;
1192 bdev->bd_disk = NULL;
1193 bdev->bd_queue = NULL;
1194 mutex_unlock(&bdev->bd_mutex);
1195 disk_unblock_events(disk);
1196 put_disk(disk);
1197 module_put(owner);
1198 goto restart;
1199 }
1200 }
1201
1202 if (!ret)
1203 bd_set_size(bdev,(loff_t)get_capacity(disk)<<9);
1204
1205
1206
1207
1208
1209
1210
1211 if (bdev->bd_invalidated) {
1212 if (!ret)
1213 rescan_partitions(disk, bdev);
1214 else if (ret == -ENOMEDIUM)
1215 invalidate_partitions(disk, bdev);
1216 }
1217 if (ret)
1218 goto out_clear;
1219 } else {
1220 struct block_device *whole;
1221 whole = bdget_disk(disk, 0);
1222 ret = -ENOMEM;
1223 if (!whole)
1224 goto out_clear;
1225 BUG_ON(for_part);
1226 ret = __blkdev_get(whole, mode, 1);
1227 if (ret)
1228 goto out_clear;
1229 bdev->bd_contains = whole;
1230 bdev->bd_part = disk_get_part(disk, partno);
1231 if (!(disk->flags & GENHD_FL_UP) ||
1232 !bdev->bd_part || !bdev->bd_part->nr_sects) {
1233 ret = -ENXIO;
1234 goto out_clear;
1235 }
1236 bd_set_size(bdev, (loff_t)bdev->bd_part->nr_sects << 9);
1237 }
1238 } else {
1239 if (bdev->bd_contains == bdev) {
1240 ret = 0;
1241 if (bdev->bd_disk->fops->open)
1242 ret = bdev->bd_disk->fops->open(bdev, mode);
1243
1244 if (bdev->bd_invalidated) {
1245 if (!ret)
1246 rescan_partitions(bdev->bd_disk, bdev);
1247 else if (ret == -ENOMEDIUM)
1248 invalidate_partitions(bdev->bd_disk, bdev);
1249 }
1250 if (ret)
1251 goto out_unlock_bdev;
1252 }
1253
1254 put_disk(disk);
1255 module_put(owner);
1256 }
1257 bdev->bd_openers++;
1258 if (for_part)
1259 bdev->bd_part_count++;
1260 mutex_unlock(&bdev->bd_mutex);
1261 disk_unblock_events(disk);
1262 return 0;
1263
1264 out_clear:
1265 disk_put_part(bdev->bd_part);
1266 bdev->bd_disk = NULL;
1267 bdev->bd_part = NULL;
1268 bdev->bd_queue = NULL;
1269 if (bdev != bdev->bd_contains)
1270 __blkdev_put(bdev->bd_contains, mode, 1);
1271 bdev->bd_contains = NULL;
1272 out_unlock_bdev:
1273 mutex_unlock(&bdev->bd_mutex);
1274 disk_unblock_events(disk);
1275 put_disk(disk);
1276 module_put(owner);
1277 out:
1278 bdput(bdev);
1279
1280 return ret;
1281}
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302int blkdev_get(struct block_device *bdev, fmode_t mode, void *holder)
1303{
1304 struct block_device *whole = NULL;
1305 int res;
1306
1307 WARN_ON_ONCE((mode & FMODE_EXCL) && !holder);
1308
1309 if ((mode & FMODE_EXCL) && holder) {
1310 whole = bd_start_claiming(bdev, holder);
1311 if (IS_ERR(whole)) {
1312 bdput(bdev);
1313 return PTR_ERR(whole);
1314 }
1315 }
1316
1317 res = __blkdev_get(bdev, mode, 0);
1318
1319 if (whole) {
1320 struct gendisk *disk = whole->bd_disk;
1321
1322
1323 mutex_lock(&bdev->bd_mutex);
1324 spin_lock(&bdev_lock);
1325
1326 if (!res) {
1327 BUG_ON(!bd_may_claim(bdev, whole, holder));
1328
1329
1330
1331
1332
1333
1334 whole->bd_holders++;
1335 whole->bd_holder = bd_may_claim;
1336 bdev->bd_holders++;
1337 bdev->bd_holder = holder;
1338 }
1339
1340
1341 BUG_ON(whole->bd_claiming != holder);
1342 whole->bd_claiming = NULL;
1343 wake_up_bit(&whole->bd_claiming, 0);
1344
1345 spin_unlock(&bdev_lock);
1346
1347
1348
1349
1350
1351
1352
1353
1354 if (!res && (mode & FMODE_WRITE) && !bdev->bd_write_holder &&
1355 (disk->flags & GENHD_FL_BLOCK_EVENTS_ON_EXCL_WRITE)) {
1356 bdev->bd_write_holder = true;
1357 disk_block_events(disk);
1358 }
1359
1360 mutex_unlock(&bdev->bd_mutex);
1361 bdput(whole);
1362 }
1363
1364 return res;
1365}
1366EXPORT_SYMBOL(blkdev_get);
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385struct block_device *blkdev_get_by_path(const char *path, fmode_t mode,
1386 void *holder)
1387{
1388 struct block_device *bdev;
1389 int err;
1390
1391 bdev = lookup_bdev(path);
1392 if (IS_ERR(bdev))
1393 return bdev;
1394
1395 err = blkdev_get(bdev, mode, holder);
1396 if (err)
1397 return ERR_PTR(err);
1398
1399 if ((mode & FMODE_WRITE) && bdev_read_only(bdev)) {
1400 blkdev_put(bdev, mode);
1401 return ERR_PTR(-EACCES);
1402 }
1403
1404 return bdev;
1405}
1406EXPORT_SYMBOL(blkdev_get_by_path);
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430struct block_device *blkdev_get_by_dev(dev_t dev, fmode_t mode, void *holder)
1431{
1432 struct block_device *bdev;
1433 int err;
1434
1435 bdev = bdget(dev);
1436 if (!bdev)
1437 return ERR_PTR(-ENOMEM);
1438
1439 err = blkdev_get(bdev, mode, holder);
1440 if (err)
1441 return ERR_PTR(err);
1442
1443 return bdev;
1444}
1445EXPORT_SYMBOL(blkdev_get_by_dev);
1446
1447static int blkdev_open(struct inode * inode, struct file * filp)
1448{
1449 struct block_device *bdev;
1450
1451
1452
1453
1454
1455
1456
1457 filp->f_flags |= O_LARGEFILE;
1458
1459 if (filp->f_flags & O_NDELAY)
1460 filp->f_mode |= FMODE_NDELAY;
1461 if (filp->f_flags & O_EXCL)
1462 filp->f_mode |= FMODE_EXCL;
1463 if ((filp->f_flags & O_ACCMODE) == 3)
1464 filp->f_mode |= FMODE_WRITE_IOCTL;
1465
1466 bdev = bd_acquire(inode);
1467 if (bdev == NULL)
1468 return -ENOMEM;
1469
1470 filp->f_mapping = bdev->bd_inode->i_mapping;
1471
1472 return blkdev_get(bdev, filp->f_mode, filp);
1473}
1474
1475static void __blkdev_put(struct block_device *bdev, fmode_t mode, int for_part)
1476{
1477 struct gendisk *disk = bdev->bd_disk;
1478 struct block_device *victim = NULL;
1479
1480 mutex_lock_nested(&bdev->bd_mutex, for_part);
1481 if (for_part)
1482 bdev->bd_part_count--;
1483
1484 if (!--bdev->bd_openers) {
1485 WARN_ON_ONCE(bdev->bd_holders);
1486 sync_blockdev(bdev);
1487 kill_bdev(bdev);
1488
1489
1490
1491
1492 bdev_write_inode(bdev->bd_inode);
1493 }
1494 if (bdev->bd_contains == bdev) {
1495 if (disk->fops->release)
1496 disk->fops->release(disk, mode);
1497 }
1498 if (!bdev->bd_openers) {
1499 struct module *owner = disk->fops->owner;
1500
1501 disk_put_part(bdev->bd_part);
1502 bdev->bd_part = NULL;
1503 bdev->bd_disk = NULL;
1504 if (bdev != bdev->bd_contains)
1505 victim = bdev->bd_contains;
1506 bdev->bd_contains = NULL;
1507
1508 put_disk(disk);
1509 module_put(owner);
1510 }
1511 mutex_unlock(&bdev->bd_mutex);
1512 bdput(bdev);
1513 if (victim)
1514 __blkdev_put(victim, mode, 1);
1515}
1516
1517void blkdev_put(struct block_device *bdev, fmode_t mode)
1518{
1519 mutex_lock(&bdev->bd_mutex);
1520
1521 if (mode & FMODE_EXCL) {
1522 bool bdev_free;
1523
1524
1525
1526
1527
1528
1529 spin_lock(&bdev_lock);
1530
1531 WARN_ON_ONCE(--bdev->bd_holders < 0);
1532 WARN_ON_ONCE(--bdev->bd_contains->bd_holders < 0);
1533
1534
1535 if ((bdev_free = !bdev->bd_holders))
1536 bdev->bd_holder = NULL;
1537 if (!bdev->bd_contains->bd_holders)
1538 bdev->bd_contains->bd_holder = NULL;
1539
1540 spin_unlock(&bdev_lock);
1541
1542
1543
1544
1545
1546 if (bdev_free && bdev->bd_write_holder) {
1547 disk_unblock_events(bdev->bd_disk);
1548 bdev->bd_write_holder = false;
1549 }
1550 }
1551
1552
1553
1554
1555
1556
1557 disk_flush_events(bdev->bd_disk, DISK_EVENT_MEDIA_CHANGE);
1558
1559 mutex_unlock(&bdev->bd_mutex);
1560
1561 __blkdev_put(bdev, mode, 0);
1562}
1563EXPORT_SYMBOL(blkdev_put);
1564
1565static int blkdev_close(struct inode * inode, struct file * filp)
1566{
1567 struct block_device *bdev = I_BDEV(filp->f_mapping->host);
1568 blkdev_put(bdev, filp->f_mode);
1569 return 0;
1570}
1571
1572static long block_ioctl(struct file *file, unsigned cmd, unsigned long arg)
1573{
1574 struct block_device *bdev = I_BDEV(file->f_mapping->host);
1575 fmode_t mode = file->f_mode;
1576
1577
1578
1579
1580
1581 if (file->f_flags & O_NDELAY)
1582 mode |= FMODE_NDELAY;
1583 else
1584 mode &= ~FMODE_NDELAY;
1585
1586 return blkdev_ioctl(bdev, mode, cmd, arg);
1587}
1588
1589
1590
1591
1592
1593
1594
1595
1596ssize_t blkdev_write_iter(struct kiocb *iocb, struct iov_iter *from)
1597{
1598 struct file *file = iocb->ki_filp;
1599 struct inode *bd_inode = file->f_mapping->host;
1600 loff_t size = i_size_read(bd_inode);
1601 struct blk_plug plug;
1602 ssize_t ret;
1603
1604 if (bdev_read_only(I_BDEV(bd_inode)))
1605 return -EPERM;
1606
1607 if (!iov_iter_count(from))
1608 return 0;
1609
1610 if (iocb->ki_pos >= size)
1611 return -ENOSPC;
1612
1613 iov_iter_truncate(from, size - iocb->ki_pos);
1614
1615 blk_start_plug(&plug);
1616 ret = __generic_file_write_iter(iocb, from);
1617 if (ret > 0) {
1618 ssize_t err;
1619 err = generic_write_sync(file, iocb->ki_pos - ret, ret);
1620 if (err < 0)
1621 ret = err;
1622 }
1623 blk_finish_plug(&plug);
1624 return ret;
1625}
1626EXPORT_SYMBOL_GPL(blkdev_write_iter);
1627
1628ssize_t blkdev_read_iter(struct kiocb *iocb, struct iov_iter *to)
1629{
1630 struct file *file = iocb->ki_filp;
1631 struct inode *bd_inode = file->f_mapping->host;
1632 loff_t size = i_size_read(bd_inode);
1633 loff_t pos = iocb->ki_pos;
1634
1635 if (pos >= size)
1636 return 0;
1637
1638 size -= pos;
1639 iov_iter_truncate(to, size);
1640 return generic_file_read_iter(iocb, to);
1641}
1642EXPORT_SYMBOL_GPL(blkdev_read_iter);
1643
1644
1645
1646
1647
1648static int blkdev_releasepage(struct page *page, gfp_t wait)
1649{
1650 struct super_block *super = BDEV_I(page->mapping->host)->bdev.bd_super;
1651
1652 if (super && super->s_op->bdev_try_to_free_page)
1653 return super->s_op->bdev_try_to_free_page(super, page, wait);
1654
1655 return try_to_free_buffers(page);
1656}
1657
1658static const struct address_space_operations def_blk_aops = {
1659 .readpage = blkdev_readpage,
1660 .readpages = blkdev_readpages,
1661 .writepage = blkdev_writepage,
1662 .write_begin = blkdev_write_begin,
1663 .write_end = blkdev_write_end,
1664 .writepages = generic_writepages,
1665 .releasepage = blkdev_releasepage,
1666 .direct_IO = blkdev_direct_IO,
1667 .is_dirty_writeback = buffer_check_dirty_writeback,
1668};
1669
1670const struct file_operations def_blk_fops = {
1671 .open = blkdev_open,
1672 .release = blkdev_close,
1673 .llseek = block_llseek,
1674 .read_iter = blkdev_read_iter,
1675 .write_iter = blkdev_write_iter,
1676 .mmap = generic_file_mmap,
1677 .fsync = blkdev_fsync,
1678 .unlocked_ioctl = block_ioctl,
1679#ifdef CONFIG_COMPAT
1680 .compat_ioctl = compat_blkdev_ioctl,
1681#endif
1682 .splice_read = generic_file_splice_read,
1683 .splice_write = iter_file_splice_write,
1684};
1685
1686int ioctl_by_bdev(struct block_device *bdev, unsigned cmd, unsigned long arg)
1687{
1688 int res;
1689 mm_segment_t old_fs = get_fs();
1690 set_fs(KERNEL_DS);
1691 res = blkdev_ioctl(bdev, 0, cmd, arg);
1692 set_fs(old_fs);
1693 return res;
1694}
1695
1696EXPORT_SYMBOL(ioctl_by_bdev);
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706struct block_device *lookup_bdev(const char *pathname)
1707{
1708 struct block_device *bdev;
1709 struct inode *inode;
1710 struct path path;
1711 int error;
1712
1713 if (!pathname || !*pathname)
1714 return ERR_PTR(-EINVAL);
1715
1716 error = kern_path(pathname, LOOKUP_FOLLOW, &path);
1717 if (error)
1718 return ERR_PTR(error);
1719
1720 inode = d_backing_inode(path.dentry);
1721 error = -ENOTBLK;
1722 if (!S_ISBLK(inode->i_mode))
1723 goto fail;
1724 error = -EACCES;
1725 if (path.mnt->mnt_flags & MNT_NODEV)
1726 goto fail;
1727 error = -ENOMEM;
1728 bdev = bd_acquire(inode);
1729 if (!bdev)
1730 goto fail;
1731out:
1732 path_put(&path);
1733 return bdev;
1734fail:
1735 bdev = ERR_PTR(error);
1736 goto out;
1737}
1738EXPORT_SYMBOL(lookup_bdev);
1739
1740int __invalidate_device(struct block_device *bdev, bool kill_dirty)
1741{
1742 struct super_block *sb = get_super(bdev);
1743 int res = 0;
1744
1745 if (sb) {
1746
1747
1748
1749
1750
1751
1752 shrink_dcache_sb(sb);
1753 res = invalidate_inodes(sb, kill_dirty);
1754 drop_super(sb);
1755 }
1756 invalidate_bdev(bdev);
1757 return res;
1758}
1759EXPORT_SYMBOL(__invalidate_device);
1760
1761void iterate_bdevs(void (*func)(struct block_device *, void *), void *arg)
1762{
1763 struct inode *inode, *old_inode = NULL;
1764
1765 spin_lock(&inode_sb_list_lock);
1766 list_for_each_entry(inode, &blockdev_superblock->s_inodes, i_sb_list) {
1767 struct address_space *mapping = inode->i_mapping;
1768
1769 spin_lock(&inode->i_lock);
1770 if (inode->i_state & (I_FREEING|I_WILL_FREE|I_NEW) ||
1771 mapping->nrpages == 0) {
1772 spin_unlock(&inode->i_lock);
1773 continue;
1774 }
1775 __iget(inode);
1776 spin_unlock(&inode->i_lock);
1777 spin_unlock(&inode_sb_list_lock);
1778
1779
1780
1781
1782
1783
1784
1785
1786 iput(old_inode);
1787 old_inode = inode;
1788
1789 func(I_BDEV(inode), arg);
1790
1791 spin_lock(&inode_sb_list_lock);
1792 }
1793 spin_unlock(&inode_sb_list_lock);
1794 iput(old_inode);
1795}
1796