1
2
3
4
5
6
7
8#include <linux/init.h>
9#include <linux/mm.h>
10#include <linux/fcntl.h>
11#include <linux/slab.h>
12#include <linux/kmod.h>
13#include <linux/major.h>
14#include <linux/device_cgroup.h>
15#include <linux/highmem.h>
16#include <linux/blkdev.h>
17#include <linux/module.h>
18#include <linux/blkpg.h>
19#include <linux/magic.h>
20#include <linux/buffer_head.h>
21#include <linux/swap.h>
22#include <linux/pagevec.h>
23#include <linux/writeback.h>
24#include <linux/mpage.h>
25#include <linux/mount.h>
26#include <linux/uio.h>
27#include <linux/namei.h>
28#include <linux/log2.h>
29#include <linux/cleancache.h>
30#include <linux/aio.h>
31#include <asm/uaccess.h>
32#include "internal.h"
33
34struct bdev_inode {
35 struct block_device bdev;
36 struct inode vfs_inode;
37};
38
39static const struct address_space_operations def_blk_aops;
40
41static inline struct bdev_inode *BDEV_I(struct inode *inode)
42{
43 return container_of(inode, struct bdev_inode, vfs_inode);
44}
45
46inline struct block_device *I_BDEV(struct inode *inode)
47{
48 return &BDEV_I(inode)->bdev;
49}
50EXPORT_SYMBOL(I_BDEV);
51
52static void bdev_write_inode(struct inode *inode)
53{
54 spin_lock(&inode->i_lock);
55 while (inode->i_state & I_DIRTY) {
56 spin_unlock(&inode->i_lock);
57 WARN_ON_ONCE(write_inode_now(inode, true));
58 spin_lock(&inode->i_lock);
59 }
60 spin_unlock(&inode->i_lock);
61}
62
63
64void kill_bdev(struct block_device *bdev)
65{
66 struct address_space *mapping = bdev->bd_inode->i_mapping;
67
68 if (mapping->nrpages == 0 && mapping->nrshadows == 0)
69 return;
70
71 invalidate_bh_lrus();
72 truncate_inode_pages(mapping, 0);
73}
74EXPORT_SYMBOL(kill_bdev);
75
76
77void invalidate_bdev(struct block_device *bdev)
78{
79 struct address_space *mapping = bdev->bd_inode->i_mapping;
80
81 if (mapping->nrpages == 0)
82 return;
83
84 invalidate_bh_lrus();
85 lru_add_drain_all();
86 invalidate_mapping_pages(mapping, 0, -1);
87
88
89
90 cleancache_invalidate_inode(mapping);
91}
92EXPORT_SYMBOL(invalidate_bdev);
93
94int set_blocksize(struct block_device *bdev, int size)
95{
96
97 if (size > PAGE_SIZE || size < 512 || !is_power_of_2(size))
98 return -EINVAL;
99
100
101 if (size < bdev_logical_block_size(bdev))
102 return -EINVAL;
103
104
105 if (bdev->bd_block_size != size) {
106 sync_blockdev(bdev);
107 bdev->bd_block_size = size;
108 bdev->bd_inode->i_blkbits = blksize_bits(size);
109 kill_bdev(bdev);
110 }
111 return 0;
112}
113
114EXPORT_SYMBOL(set_blocksize);
115
116int sb_set_blocksize(struct super_block *sb, int size)
117{
118 if (set_blocksize(sb->s_bdev, size))
119 return 0;
120
121
122 sb->s_blocksize = size;
123 sb->s_blocksize_bits = blksize_bits(size);
124 return sb->s_blocksize;
125}
126
127EXPORT_SYMBOL(sb_set_blocksize);
128
129int sb_min_blocksize(struct super_block *sb, int size)
130{
131 int minsize = bdev_logical_block_size(sb->s_bdev);
132 if (size < minsize)
133 size = minsize;
134 return sb_set_blocksize(sb, size);
135}
136
137EXPORT_SYMBOL(sb_min_blocksize);
138
139static int
140blkdev_get_block(struct inode *inode, sector_t iblock,
141 struct buffer_head *bh, int create)
142{
143 bh->b_bdev = I_BDEV(inode);
144 bh->b_blocknr = iblock;
145 set_buffer_mapped(bh);
146 return 0;
147}
148
149static ssize_t
150blkdev_direct_IO(int rw, struct kiocb *iocb, struct iov_iter *iter,
151 loff_t offset)
152{
153 struct file *file = iocb->ki_filp;
154 struct inode *inode = file->f_mapping->host;
155
156 return __blockdev_direct_IO(rw, iocb, inode, I_BDEV(inode), iter,
157 offset, blkdev_get_block,
158 NULL, NULL, 0);
159}
160
161int __sync_blockdev(struct block_device *bdev, int wait)
162{
163 if (!bdev)
164 return 0;
165 if (!wait)
166 return filemap_flush(bdev->bd_inode->i_mapping);
167 return filemap_write_and_wait(bdev->bd_inode->i_mapping);
168}
169
170
171
172
173
174int sync_blockdev(struct block_device *bdev)
175{
176 return __sync_blockdev(bdev, 1);
177}
178EXPORT_SYMBOL(sync_blockdev);
179
180
181
182
183
184
185int fsync_bdev(struct block_device *bdev)
186{
187 struct super_block *sb = get_super(bdev);
188 if (sb) {
189 int res = sync_filesystem(sb);
190 drop_super(sb);
191 return res;
192 }
193 return sync_blockdev(bdev);
194}
195EXPORT_SYMBOL(fsync_bdev);
196
197
198
199
200
201
202
203
204
205
206
207
208
209struct super_block *freeze_bdev(struct block_device *bdev)
210{
211 struct super_block *sb;
212 int error = 0;
213
214 mutex_lock(&bdev->bd_fsfreeze_mutex);
215 if (++bdev->bd_fsfreeze_count > 1) {
216
217
218
219
220
221 sb = get_super(bdev);
222 drop_super(sb);
223 mutex_unlock(&bdev->bd_fsfreeze_mutex);
224 return sb;
225 }
226
227 sb = get_active_super(bdev);
228 if (!sb)
229 goto out;
230 if (sb->s_op->freeze_super)
231 error = sb->s_op->freeze_super(sb);
232 else
233 error = freeze_super(sb);
234 if (error) {
235 deactivate_super(sb);
236 bdev->bd_fsfreeze_count--;
237 mutex_unlock(&bdev->bd_fsfreeze_mutex);
238 return ERR_PTR(error);
239 }
240 deactivate_super(sb);
241 out:
242 sync_blockdev(bdev);
243 mutex_unlock(&bdev->bd_fsfreeze_mutex);
244 return sb;
245}
246EXPORT_SYMBOL(freeze_bdev);
247
248
249
250
251
252
253
254
255int thaw_bdev(struct block_device *bdev, struct super_block *sb)
256{
257 int error = -EINVAL;
258
259 mutex_lock(&bdev->bd_fsfreeze_mutex);
260 if (!bdev->bd_fsfreeze_count)
261 goto out;
262
263 error = 0;
264 if (--bdev->bd_fsfreeze_count > 0)
265 goto out;
266
267 if (!sb)
268 goto out;
269
270 if (sb->s_op->thaw_super)
271 error = sb->s_op->thaw_super(sb);
272 else
273 error = thaw_super(sb);
274 if (error) {
275 bdev->bd_fsfreeze_count++;
276 mutex_unlock(&bdev->bd_fsfreeze_mutex);
277 return error;
278 }
279out:
280 mutex_unlock(&bdev->bd_fsfreeze_mutex);
281 return 0;
282}
283EXPORT_SYMBOL(thaw_bdev);
284
285static int blkdev_writepage(struct page *page, struct writeback_control *wbc)
286{
287 return block_write_full_page(page, blkdev_get_block, wbc);
288}
289
290static int blkdev_readpage(struct file * file, struct page * page)
291{
292 return block_read_full_page(page, blkdev_get_block);
293}
294
295static int blkdev_readpages(struct file *file, struct address_space *mapping,
296 struct list_head *pages, unsigned nr_pages)
297{
298 return mpage_readpages(mapping, pages, nr_pages, blkdev_get_block);
299}
300
301static int blkdev_write_begin(struct file *file, struct address_space *mapping,
302 loff_t pos, unsigned len, unsigned flags,
303 struct page **pagep, void **fsdata)
304{
305 return block_write_begin(mapping, pos, len, flags, pagep,
306 blkdev_get_block);
307}
308
309static int blkdev_write_end(struct file *file, struct address_space *mapping,
310 loff_t pos, unsigned len, unsigned copied,
311 struct page *page, void *fsdata)
312{
313 int ret;
314 ret = block_write_end(file, mapping, pos, len, copied, page, fsdata);
315
316 unlock_page(page);
317 page_cache_release(page);
318
319 return ret;
320}
321
322
323
324
325
326
327static loff_t block_llseek(struct file *file, loff_t offset, int whence)
328{
329 struct inode *bd_inode = file->f_mapping->host;
330 loff_t retval;
331
332 mutex_lock(&bd_inode->i_mutex);
333 retval = fixed_size_llseek(file, offset, whence, i_size_read(bd_inode));
334 mutex_unlock(&bd_inode->i_mutex);
335 return retval;
336}
337
338int blkdev_fsync(struct file *filp, loff_t start, loff_t end, int datasync)
339{
340 struct inode *bd_inode = filp->f_mapping->host;
341 struct block_device *bdev = I_BDEV(bd_inode);
342 int error;
343
344 error = filemap_write_and_wait_range(filp->f_mapping, start, end);
345 if (error)
346 return error;
347
348
349
350
351
352
353 error = blkdev_issue_flush(bdev, GFP_KERNEL, NULL);
354 if (error == -EOPNOTSUPP)
355 error = 0;
356
357 return error;
358}
359EXPORT_SYMBOL(blkdev_fsync);
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377int bdev_read_page(struct block_device *bdev, sector_t sector,
378 struct page *page)
379{
380 const struct block_device_operations *ops = bdev->bd_disk->fops;
381 if (!ops->rw_page)
382 return -EOPNOTSUPP;
383 return ops->rw_page(bdev, sector + get_start_sect(bdev), page, READ);
384}
385EXPORT_SYMBOL_GPL(bdev_read_page);
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406int bdev_write_page(struct block_device *bdev, sector_t sector,
407 struct page *page, struct writeback_control *wbc)
408{
409 int result;
410 int rw = (wbc->sync_mode == WB_SYNC_ALL) ? WRITE_SYNC : WRITE;
411 const struct block_device_operations *ops = bdev->bd_disk->fops;
412 if (!ops->rw_page)
413 return -EOPNOTSUPP;
414 set_page_writeback(page);
415 result = ops->rw_page(bdev, sector + get_start_sect(bdev), page, rw);
416 if (result)
417 end_page_writeback(page);
418 else
419 unlock_page(page);
420 return result;
421}
422EXPORT_SYMBOL_GPL(bdev_write_page);
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441long bdev_direct_access(struct block_device *bdev, sector_t sector,
442 void **addr, unsigned long *pfn, long size)
443{
444 long avail;
445 const struct block_device_operations *ops = bdev->bd_disk->fops;
446
447 if (size < 0)
448 return size;
449 if (!ops->direct_access)
450 return -EOPNOTSUPP;
451 if ((sector + DIV_ROUND_UP(size, 512)) >
452 part_nr_sects_read(bdev->bd_part))
453 return -ERANGE;
454 sector += get_start_sect(bdev);
455 if (sector % (PAGE_SIZE / 512))
456 return -EINVAL;
457 avail = ops->direct_access(bdev, sector, addr, pfn, size);
458 if (!avail)
459 return -ERANGE;
460 return min(avail, size);
461}
462EXPORT_SYMBOL_GPL(bdev_direct_access);
463
464
465
466
467
468static __cacheline_aligned_in_smp DEFINE_SPINLOCK(bdev_lock);
469static struct kmem_cache * bdev_cachep __read_mostly;
470
471static struct inode *bdev_alloc_inode(struct super_block *sb)
472{
473 struct bdev_inode *ei = kmem_cache_alloc(bdev_cachep, GFP_KERNEL);
474 if (!ei)
475 return NULL;
476 return &ei->vfs_inode;
477}
478
479static void bdev_i_callback(struct rcu_head *head)
480{
481 struct inode *inode = container_of(head, struct inode, i_rcu);
482 struct bdev_inode *bdi = BDEV_I(inode);
483
484 kmem_cache_free(bdev_cachep, bdi);
485}
486
487static void bdev_destroy_inode(struct inode *inode)
488{
489 call_rcu(&inode->i_rcu, bdev_i_callback);
490}
491
492static void init_once(void *foo)
493{
494 struct bdev_inode *ei = (struct bdev_inode *) foo;
495 struct block_device *bdev = &ei->bdev;
496
497 memset(bdev, 0, sizeof(*bdev));
498 mutex_init(&bdev->bd_mutex);
499 INIT_LIST_HEAD(&bdev->bd_inodes);
500 INIT_LIST_HEAD(&bdev->bd_list);
501#ifdef CONFIG_SYSFS
502 INIT_LIST_HEAD(&bdev->bd_holder_disks);
503#endif
504 inode_init_once(&ei->vfs_inode);
505
506 mutex_init(&bdev->bd_fsfreeze_mutex);
507}
508
509static inline void __bd_forget(struct inode *inode)
510{
511 list_del_init(&inode->i_devices);
512 inode->i_bdev = NULL;
513 inode->i_mapping = &inode->i_data;
514}
515
516static void bdev_evict_inode(struct inode *inode)
517{
518 struct block_device *bdev = &BDEV_I(inode)->bdev;
519 struct list_head *p;
520 truncate_inode_pages_final(&inode->i_data);
521 invalidate_inode_buffers(inode);
522 clear_inode(inode);
523 spin_lock(&bdev_lock);
524 while ( (p = bdev->bd_inodes.next) != &bdev->bd_inodes ) {
525 __bd_forget(list_entry(p, struct inode, i_devices));
526 }
527 list_del_init(&bdev->bd_list);
528 spin_unlock(&bdev_lock);
529}
530
531static const struct super_operations bdev_sops = {
532 .statfs = simple_statfs,
533 .alloc_inode = bdev_alloc_inode,
534 .destroy_inode = bdev_destroy_inode,
535 .drop_inode = generic_delete_inode,
536 .evict_inode = bdev_evict_inode,
537};
538
539static struct dentry *bd_mount(struct file_system_type *fs_type,
540 int flags, const char *dev_name, void *data)
541{
542 return mount_pseudo(fs_type, "bdev:", &bdev_sops, NULL, BDEVFS_MAGIC);
543}
544
545static struct file_system_type bd_type = {
546 .name = "bdev",
547 .mount = bd_mount,
548 .kill_sb = kill_anon_super,
549};
550
551static struct super_block *blockdev_superblock __read_mostly;
552
553void __init bdev_cache_init(void)
554{
555 int err;
556 static struct vfsmount *bd_mnt;
557
558 bdev_cachep = kmem_cache_create("bdev_cache", sizeof(struct bdev_inode),
559 0, (SLAB_HWCACHE_ALIGN|SLAB_RECLAIM_ACCOUNT|
560 SLAB_MEM_SPREAD|SLAB_PANIC),
561 init_once);
562 err = register_filesystem(&bd_type);
563 if (err)
564 panic("Cannot register bdev pseudo-fs");
565 bd_mnt = kern_mount(&bd_type);
566 if (IS_ERR(bd_mnt))
567 panic("Cannot create bdev pseudo-fs");
568 blockdev_superblock = bd_mnt->mnt_sb;
569}
570
571
572
573
574
575
576static inline unsigned long hash(dev_t dev)
577{
578 return MAJOR(dev)+MINOR(dev);
579}
580
581static int bdev_test(struct inode *inode, void *data)
582{
583 return BDEV_I(inode)->bdev.bd_dev == *(dev_t *)data;
584}
585
586static int bdev_set(struct inode *inode, void *data)
587{
588 BDEV_I(inode)->bdev.bd_dev = *(dev_t *)data;
589 return 0;
590}
591
592static LIST_HEAD(all_bdevs);
593
594struct block_device *bdget(dev_t dev)
595{
596 struct block_device *bdev;
597 struct inode *inode;
598
599 inode = iget5_locked(blockdev_superblock, hash(dev),
600 bdev_test, bdev_set, &dev);
601
602 if (!inode)
603 return NULL;
604
605 bdev = &BDEV_I(inode)->bdev;
606
607 if (inode->i_state & I_NEW) {
608 bdev->bd_contains = NULL;
609 bdev->bd_super = NULL;
610 bdev->bd_inode = inode;
611 bdev->bd_block_size = (1 << inode->i_blkbits);
612 bdev->bd_part_count = 0;
613 bdev->bd_invalidated = 0;
614 inode->i_mode = S_IFBLK;
615 inode->i_rdev = dev;
616 inode->i_bdev = bdev;
617 inode->i_data.a_ops = &def_blk_aops;
618 mapping_set_gfp_mask(&inode->i_data, GFP_USER);
619 spin_lock(&bdev_lock);
620 list_add(&bdev->bd_list, &all_bdevs);
621 spin_unlock(&bdev_lock);
622 unlock_new_inode(inode);
623 }
624 return bdev;
625}
626
627EXPORT_SYMBOL(bdget);
628
629
630
631
632
633struct block_device *bdgrab(struct block_device *bdev)
634{
635 ihold(bdev->bd_inode);
636 return bdev;
637}
638EXPORT_SYMBOL(bdgrab);
639
640long nr_blockdev_pages(void)
641{
642 struct block_device *bdev;
643 long ret = 0;
644 spin_lock(&bdev_lock);
645 list_for_each_entry(bdev, &all_bdevs, bd_list) {
646 ret += bdev->bd_inode->i_mapping->nrpages;
647 }
648 spin_unlock(&bdev_lock);
649 return ret;
650}
651
652void bdput(struct block_device *bdev)
653{
654 iput(bdev->bd_inode);
655}
656
657EXPORT_SYMBOL(bdput);
658
659static struct block_device *bd_acquire(struct inode *inode)
660{
661 struct block_device *bdev;
662
663 spin_lock(&bdev_lock);
664 bdev = inode->i_bdev;
665 if (bdev) {
666 ihold(bdev->bd_inode);
667 spin_unlock(&bdev_lock);
668 return bdev;
669 }
670 spin_unlock(&bdev_lock);
671
672 bdev = bdget(inode->i_rdev);
673 if (bdev) {
674 spin_lock(&bdev_lock);
675 if (!inode->i_bdev) {
676
677
678
679
680
681
682 ihold(bdev->bd_inode);
683 inode->i_bdev = bdev;
684 inode->i_mapping = bdev->bd_inode->i_mapping;
685 list_add(&inode->i_devices, &bdev->bd_inodes);
686 }
687 spin_unlock(&bdev_lock);
688 }
689 return bdev;
690}
691
692int sb_is_blkdev_sb(struct super_block *sb)
693{
694 return sb == blockdev_superblock;
695}
696
697
698
699void bd_forget(struct inode *inode)
700{
701 struct block_device *bdev = NULL;
702
703 spin_lock(&bdev_lock);
704 if (!sb_is_blkdev_sb(inode->i_sb))
705 bdev = inode->i_bdev;
706 __bd_forget(inode);
707 spin_unlock(&bdev_lock);
708
709 if (bdev)
710 iput(bdev->bd_inode);
711}
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727static bool bd_may_claim(struct block_device *bdev, struct block_device *whole,
728 void *holder)
729{
730 if (bdev->bd_holder == holder)
731 return true;
732 else if (bdev->bd_holder != NULL)
733 return false;
734 else if (bdev->bd_contains == bdev)
735 return true;
736
737 else if (whole->bd_holder == bd_may_claim)
738 return true;
739 else if (whole->bd_holder != NULL)
740 return false;
741 else
742 return true;
743}
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763static int bd_prepare_to_claim(struct block_device *bdev,
764 struct block_device *whole, void *holder)
765{
766retry:
767
768 if (!bd_may_claim(bdev, whole, holder))
769 return -EBUSY;
770
771
772 if (whole->bd_claiming) {
773 wait_queue_head_t *wq = bit_waitqueue(&whole->bd_claiming, 0);
774 DEFINE_WAIT(wait);
775
776 prepare_to_wait(wq, &wait, TASK_UNINTERRUPTIBLE);
777 spin_unlock(&bdev_lock);
778 schedule();
779 finish_wait(wq, &wait);
780 spin_lock(&bdev_lock);
781 goto retry;
782 }
783
784
785 return 0;
786}
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811static struct block_device *bd_start_claiming(struct block_device *bdev,
812 void *holder)
813{
814 struct gendisk *disk;
815 struct block_device *whole;
816 int partno, err;
817
818 might_sleep();
819
820
821
822
823
824 disk = get_gendisk(bdev->bd_dev, &partno);
825 if (!disk)
826 return ERR_PTR(-ENXIO);
827
828
829
830
831
832
833
834
835
836 if (partno)
837 whole = bdget_disk(disk, 0);
838 else
839 whole = bdgrab(bdev);
840
841 module_put(disk->fops->owner);
842 put_disk(disk);
843 if (!whole)
844 return ERR_PTR(-ENOMEM);
845
846
847 spin_lock(&bdev_lock);
848
849 err = bd_prepare_to_claim(bdev, whole, holder);
850 if (err == 0) {
851 whole->bd_claiming = holder;
852 spin_unlock(&bdev_lock);
853 return whole;
854 } else {
855 spin_unlock(&bdev_lock);
856 bdput(whole);
857 return ERR_PTR(err);
858 }
859}
860
861#ifdef CONFIG_SYSFS
862struct bd_holder_disk {
863 struct list_head list;
864 struct gendisk *disk;
865 int refcnt;
866};
867
868static struct bd_holder_disk *bd_find_holder_disk(struct block_device *bdev,
869 struct gendisk *disk)
870{
871 struct bd_holder_disk *holder;
872
873 list_for_each_entry(holder, &bdev->bd_holder_disks, list)
874 if (holder->disk == disk)
875 return holder;
876 return NULL;
877}
878
879static int add_symlink(struct kobject *from, struct kobject *to)
880{
881 return sysfs_create_link(from, to, kobject_name(to));
882}
883
884static void del_symlink(struct kobject *from, struct kobject *to)
885{
886 sysfs_remove_link(from, kobject_name(to));
887}
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917int bd_link_disk_holder(struct block_device *bdev, struct gendisk *disk)
918{
919 struct bd_holder_disk *holder;
920 int ret = 0;
921
922 mutex_lock(&bdev->bd_mutex);
923
924 WARN_ON_ONCE(!bdev->bd_holder);
925
926
927 if (WARN_ON(!disk->slave_dir || !bdev->bd_part->holder_dir))
928 goto out_unlock;
929
930 holder = bd_find_holder_disk(bdev, disk);
931 if (holder) {
932 holder->refcnt++;
933 goto out_unlock;
934 }
935
936 holder = kzalloc(sizeof(*holder), GFP_KERNEL);
937 if (!holder) {
938 ret = -ENOMEM;
939 goto out_unlock;
940 }
941
942 INIT_LIST_HEAD(&holder->list);
943 holder->disk = disk;
944 holder->refcnt = 1;
945
946 ret = add_symlink(disk->slave_dir, &part_to_dev(bdev->bd_part)->kobj);
947 if (ret)
948 goto out_free;
949
950 ret = add_symlink(bdev->bd_part->holder_dir, &disk_to_dev(disk)->kobj);
951 if (ret)
952 goto out_del;
953
954
955
956
957 kobject_get(bdev->bd_part->holder_dir);
958
959 list_add(&holder->list, &bdev->bd_holder_disks);
960 goto out_unlock;
961
962out_del:
963 del_symlink(disk->slave_dir, &part_to_dev(bdev->bd_part)->kobj);
964out_free:
965 kfree(holder);
966out_unlock:
967 mutex_unlock(&bdev->bd_mutex);
968 return ret;
969}
970EXPORT_SYMBOL_GPL(bd_link_disk_holder);
971
972
973
974
975
976
977
978
979
980
981
982void bd_unlink_disk_holder(struct block_device *bdev, struct gendisk *disk)
983{
984 struct bd_holder_disk *holder;
985
986 mutex_lock(&bdev->bd_mutex);
987
988 holder = bd_find_holder_disk(bdev, disk);
989
990 if (!WARN_ON_ONCE(holder == NULL) && !--holder->refcnt) {
991 del_symlink(disk->slave_dir, &part_to_dev(bdev->bd_part)->kobj);
992 del_symlink(bdev->bd_part->holder_dir,
993 &disk_to_dev(disk)->kobj);
994 kobject_put(bdev->bd_part->holder_dir);
995 list_del_init(&holder->list);
996 kfree(holder);
997 }
998
999 mutex_unlock(&bdev->bd_mutex);
1000}
1001EXPORT_SYMBOL_GPL(bd_unlink_disk_holder);
1002#endif
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014static void flush_disk(struct block_device *bdev, bool kill_dirty)
1015{
1016 if (__invalidate_device(bdev, kill_dirty)) {
1017 char name[BDEVNAME_SIZE] = "";
1018
1019 if (bdev->bd_disk)
1020 disk_name(bdev->bd_disk, 0, name);
1021 printk(KERN_WARNING "VFS: busy inodes on changed media or "
1022 "resized disk %s\n", name);
1023 }
1024
1025 if (!bdev->bd_disk)
1026 return;
1027 if (disk_part_scan_enabled(bdev->bd_disk))
1028 bdev->bd_invalidated = 1;
1029}
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039void check_disk_size_change(struct gendisk *disk, struct block_device *bdev)
1040{
1041 loff_t disk_size, bdev_size;
1042
1043 disk_size = (loff_t)get_capacity(disk) << 9;
1044 bdev_size = i_size_read(bdev->bd_inode);
1045 if (disk_size != bdev_size) {
1046 char name[BDEVNAME_SIZE];
1047
1048 disk_name(disk, 0, name);
1049 printk(KERN_INFO
1050 "%s: detected capacity change from %lld to %lld\n",
1051 name, bdev_size, disk_size);
1052 i_size_write(bdev->bd_inode, disk_size);
1053 flush_disk(bdev, false);
1054 }
1055}
1056EXPORT_SYMBOL(check_disk_size_change);
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066int revalidate_disk(struct gendisk *disk)
1067{
1068 struct block_device *bdev;
1069 int ret = 0;
1070
1071 if (disk->fops->revalidate_disk)
1072 ret = disk->fops->revalidate_disk(disk);
1073
1074 bdev = bdget_disk(disk, 0);
1075 if (!bdev)
1076 return ret;
1077
1078 mutex_lock(&bdev->bd_mutex);
1079 check_disk_size_change(disk, bdev);
1080 bdev->bd_invalidated = 0;
1081 mutex_unlock(&bdev->bd_mutex);
1082 bdput(bdev);
1083 return ret;
1084}
1085EXPORT_SYMBOL(revalidate_disk);
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096int check_disk_change(struct block_device *bdev)
1097{
1098 struct gendisk *disk = bdev->bd_disk;
1099 const struct block_device_operations *bdops = disk->fops;
1100 unsigned int events;
1101
1102 events = disk_clear_events(disk, DISK_EVENT_MEDIA_CHANGE |
1103 DISK_EVENT_EJECT_REQUEST);
1104 if (!(events & DISK_EVENT_MEDIA_CHANGE))
1105 return 0;
1106
1107 flush_disk(bdev, true);
1108 if (bdops->revalidate_disk)
1109 bdops->revalidate_disk(bdev->bd_disk);
1110 return 1;
1111}
1112
1113EXPORT_SYMBOL(check_disk_change);
1114
1115void bd_set_size(struct block_device *bdev, loff_t size)
1116{
1117 unsigned bsize = bdev_logical_block_size(bdev);
1118
1119 mutex_lock(&bdev->bd_inode->i_mutex);
1120 i_size_write(bdev->bd_inode, size);
1121 mutex_unlock(&bdev->bd_inode->i_mutex);
1122 while (bsize < PAGE_CACHE_SIZE) {
1123 if (size & bsize)
1124 break;
1125 bsize <<= 1;
1126 }
1127 bdev->bd_block_size = bsize;
1128 bdev->bd_inode->i_blkbits = blksize_bits(bsize);
1129}
1130EXPORT_SYMBOL(bd_set_size);
1131
1132static void __blkdev_put(struct block_device *bdev, fmode_t mode, int for_part);
1133
1134
1135
1136
1137
1138
1139
1140
1141static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part)
1142{
1143 struct gendisk *disk;
1144 struct module *owner;
1145 int ret;
1146 int partno;
1147 int perm = 0;
1148
1149 if (mode & FMODE_READ)
1150 perm |= MAY_READ;
1151 if (mode & FMODE_WRITE)
1152 perm |= MAY_WRITE;
1153
1154
1155
1156 if (!for_part) {
1157 ret = devcgroup_inode_permission(bdev->bd_inode, perm);
1158 if (ret != 0) {
1159 bdput(bdev);
1160 return ret;
1161 }
1162 }
1163
1164 restart:
1165
1166 ret = -ENXIO;
1167 disk = get_gendisk(bdev->bd_dev, &partno);
1168 if (!disk)
1169 goto out;
1170 owner = disk->fops->owner;
1171
1172 disk_block_events(disk);
1173 mutex_lock_nested(&bdev->bd_mutex, for_part);
1174 if (!bdev->bd_openers) {
1175 bdev->bd_disk = disk;
1176 bdev->bd_queue = disk->queue;
1177 bdev->bd_contains = bdev;
1178 if (!partno) {
1179 ret = -ENXIO;
1180 bdev->bd_part = disk_get_part(disk, partno);
1181 if (!bdev->bd_part)
1182 goto out_clear;
1183
1184 ret = 0;
1185 if (disk->fops->open) {
1186 ret = disk->fops->open(bdev, mode);
1187 if (ret == -ERESTARTSYS) {
1188
1189
1190
1191
1192 disk_put_part(bdev->bd_part);
1193 bdev->bd_part = NULL;
1194 bdev->bd_disk = NULL;
1195 bdev->bd_queue = NULL;
1196 mutex_unlock(&bdev->bd_mutex);
1197 disk_unblock_events(disk);
1198 put_disk(disk);
1199 module_put(owner);
1200 goto restart;
1201 }
1202 }
1203
1204 if (!ret)
1205 bd_set_size(bdev,(loff_t)get_capacity(disk)<<9);
1206
1207
1208
1209
1210
1211
1212
1213 if (bdev->bd_invalidated) {
1214 if (!ret)
1215 rescan_partitions(disk, bdev);
1216 else if (ret == -ENOMEDIUM)
1217 invalidate_partitions(disk, bdev);
1218 }
1219 if (ret)
1220 goto out_clear;
1221 } else {
1222 struct block_device *whole;
1223 whole = bdget_disk(disk, 0);
1224 ret = -ENOMEM;
1225 if (!whole)
1226 goto out_clear;
1227 BUG_ON(for_part);
1228 ret = __blkdev_get(whole, mode, 1);
1229 if (ret)
1230 goto out_clear;
1231 bdev->bd_contains = whole;
1232 bdev->bd_part = disk_get_part(disk, partno);
1233 if (!(disk->flags & GENHD_FL_UP) ||
1234 !bdev->bd_part || !bdev->bd_part->nr_sects) {
1235 ret = -ENXIO;
1236 goto out_clear;
1237 }
1238 bd_set_size(bdev, (loff_t)bdev->bd_part->nr_sects << 9);
1239 }
1240 } else {
1241 if (bdev->bd_contains == bdev) {
1242 ret = 0;
1243 if (bdev->bd_disk->fops->open)
1244 ret = bdev->bd_disk->fops->open(bdev, mode);
1245
1246 if (bdev->bd_invalidated) {
1247 if (!ret)
1248 rescan_partitions(bdev->bd_disk, bdev);
1249 else if (ret == -ENOMEDIUM)
1250 invalidate_partitions(bdev->bd_disk, bdev);
1251 }
1252 if (ret)
1253 goto out_unlock_bdev;
1254 }
1255
1256 put_disk(disk);
1257 module_put(owner);
1258 }
1259 bdev->bd_openers++;
1260 if (for_part)
1261 bdev->bd_part_count++;
1262 mutex_unlock(&bdev->bd_mutex);
1263 disk_unblock_events(disk);
1264 return 0;
1265
1266 out_clear:
1267 disk_put_part(bdev->bd_part);
1268 bdev->bd_disk = NULL;
1269 bdev->bd_part = NULL;
1270 bdev->bd_queue = NULL;
1271 if (bdev != bdev->bd_contains)
1272 __blkdev_put(bdev->bd_contains, mode, 1);
1273 bdev->bd_contains = NULL;
1274 out_unlock_bdev:
1275 mutex_unlock(&bdev->bd_mutex);
1276 disk_unblock_events(disk);
1277 put_disk(disk);
1278 module_put(owner);
1279 out:
1280 bdput(bdev);
1281
1282 return ret;
1283}
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304int blkdev_get(struct block_device *bdev, fmode_t mode, void *holder)
1305{
1306 struct block_device *whole = NULL;
1307 int res;
1308
1309 WARN_ON_ONCE((mode & FMODE_EXCL) && !holder);
1310
1311 if ((mode & FMODE_EXCL) && holder) {
1312 whole = bd_start_claiming(bdev, holder);
1313 if (IS_ERR(whole)) {
1314 bdput(bdev);
1315 return PTR_ERR(whole);
1316 }
1317 }
1318
1319 res = __blkdev_get(bdev, mode, 0);
1320
1321 if (whole) {
1322 struct gendisk *disk = whole->bd_disk;
1323
1324
1325 mutex_lock(&bdev->bd_mutex);
1326 spin_lock(&bdev_lock);
1327
1328 if (!res) {
1329 BUG_ON(!bd_may_claim(bdev, whole, holder));
1330
1331
1332
1333
1334
1335
1336 whole->bd_holders++;
1337 whole->bd_holder = bd_may_claim;
1338 bdev->bd_holders++;
1339 bdev->bd_holder = holder;
1340 }
1341
1342
1343 BUG_ON(whole->bd_claiming != holder);
1344 whole->bd_claiming = NULL;
1345 wake_up_bit(&whole->bd_claiming, 0);
1346
1347 spin_unlock(&bdev_lock);
1348
1349
1350
1351
1352
1353
1354
1355
1356 if (!res && (mode & FMODE_WRITE) && !bdev->bd_write_holder &&
1357 (disk->flags & GENHD_FL_BLOCK_EVENTS_ON_EXCL_WRITE)) {
1358 bdev->bd_write_holder = true;
1359 disk_block_events(disk);
1360 }
1361
1362 mutex_unlock(&bdev->bd_mutex);
1363 bdput(whole);
1364 }
1365
1366 return res;
1367}
1368EXPORT_SYMBOL(blkdev_get);
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387struct block_device *blkdev_get_by_path(const char *path, fmode_t mode,
1388 void *holder)
1389{
1390 struct block_device *bdev;
1391 int err;
1392
1393 bdev = lookup_bdev(path);
1394 if (IS_ERR(bdev))
1395 return bdev;
1396
1397 err = blkdev_get(bdev, mode, holder);
1398 if (err)
1399 return ERR_PTR(err);
1400
1401 if ((mode & FMODE_WRITE) && bdev_read_only(bdev)) {
1402 blkdev_put(bdev, mode);
1403 return ERR_PTR(-EACCES);
1404 }
1405
1406 return bdev;
1407}
1408EXPORT_SYMBOL(blkdev_get_by_path);
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432struct block_device *blkdev_get_by_dev(dev_t dev, fmode_t mode, void *holder)
1433{
1434 struct block_device *bdev;
1435 int err;
1436
1437 bdev = bdget(dev);
1438 if (!bdev)
1439 return ERR_PTR(-ENOMEM);
1440
1441 err = blkdev_get(bdev, mode, holder);
1442 if (err)
1443 return ERR_PTR(err);
1444
1445 return bdev;
1446}
1447EXPORT_SYMBOL(blkdev_get_by_dev);
1448
1449static int blkdev_open(struct inode * inode, struct file * filp)
1450{
1451 struct block_device *bdev;
1452
1453
1454
1455
1456
1457
1458
1459 filp->f_flags |= O_LARGEFILE;
1460
1461 if (filp->f_flags & O_NDELAY)
1462 filp->f_mode |= FMODE_NDELAY;
1463 if (filp->f_flags & O_EXCL)
1464 filp->f_mode |= FMODE_EXCL;
1465 if ((filp->f_flags & O_ACCMODE) == 3)
1466 filp->f_mode |= FMODE_WRITE_IOCTL;
1467
1468 bdev = bd_acquire(inode);
1469 if (bdev == NULL)
1470 return -ENOMEM;
1471
1472 filp->f_mapping = bdev->bd_inode->i_mapping;
1473
1474 return blkdev_get(bdev, filp->f_mode, filp);
1475}
1476
1477static void __blkdev_put(struct block_device *bdev, fmode_t mode, int for_part)
1478{
1479 struct gendisk *disk = bdev->bd_disk;
1480 struct block_device *victim = NULL;
1481
1482 mutex_lock_nested(&bdev->bd_mutex, for_part);
1483 if (for_part)
1484 bdev->bd_part_count--;
1485
1486 if (!--bdev->bd_openers) {
1487 WARN_ON_ONCE(bdev->bd_holders);
1488 sync_blockdev(bdev);
1489 kill_bdev(bdev);
1490
1491
1492
1493
1494 bdev_write_inode(bdev->bd_inode);
1495 }
1496 if (bdev->bd_contains == bdev) {
1497 if (disk->fops->release)
1498 disk->fops->release(disk, mode);
1499 }
1500 if (!bdev->bd_openers) {
1501 struct module *owner = disk->fops->owner;
1502
1503 disk_put_part(bdev->bd_part);
1504 bdev->bd_part = NULL;
1505 bdev->bd_disk = NULL;
1506 if (bdev != bdev->bd_contains)
1507 victim = bdev->bd_contains;
1508 bdev->bd_contains = NULL;
1509
1510 put_disk(disk);
1511 module_put(owner);
1512 }
1513 mutex_unlock(&bdev->bd_mutex);
1514 bdput(bdev);
1515 if (victim)
1516 __blkdev_put(victim, mode, 1);
1517}
1518
1519void blkdev_put(struct block_device *bdev, fmode_t mode)
1520{
1521 mutex_lock(&bdev->bd_mutex);
1522
1523 if (mode & FMODE_EXCL) {
1524 bool bdev_free;
1525
1526
1527
1528
1529
1530
1531 spin_lock(&bdev_lock);
1532
1533 WARN_ON_ONCE(--bdev->bd_holders < 0);
1534 WARN_ON_ONCE(--bdev->bd_contains->bd_holders < 0);
1535
1536
1537 if ((bdev_free = !bdev->bd_holders))
1538 bdev->bd_holder = NULL;
1539 if (!bdev->bd_contains->bd_holders)
1540 bdev->bd_contains->bd_holder = NULL;
1541
1542 spin_unlock(&bdev_lock);
1543
1544
1545
1546
1547
1548 if (bdev_free && bdev->bd_write_holder) {
1549 disk_unblock_events(bdev->bd_disk);
1550 bdev->bd_write_holder = false;
1551 }
1552 }
1553
1554
1555
1556
1557
1558
1559 disk_flush_events(bdev->bd_disk, DISK_EVENT_MEDIA_CHANGE);
1560
1561 mutex_unlock(&bdev->bd_mutex);
1562
1563 __blkdev_put(bdev, mode, 0);
1564}
1565EXPORT_SYMBOL(blkdev_put);
1566
1567static int blkdev_close(struct inode * inode, struct file * filp)
1568{
1569 struct block_device *bdev = I_BDEV(filp->f_mapping->host);
1570 blkdev_put(bdev, filp->f_mode);
1571 return 0;
1572}
1573
1574static long block_ioctl(struct file *file, unsigned cmd, unsigned long arg)
1575{
1576 struct block_device *bdev = I_BDEV(file->f_mapping->host);
1577 fmode_t mode = file->f_mode;
1578
1579
1580
1581
1582
1583 if (file->f_flags & O_NDELAY)
1584 mode |= FMODE_NDELAY;
1585 else
1586 mode &= ~FMODE_NDELAY;
1587
1588 return blkdev_ioctl(bdev, mode, cmd, arg);
1589}
1590
1591
1592
1593
1594
1595
1596
1597
1598ssize_t blkdev_write_iter(struct kiocb *iocb, struct iov_iter *from)
1599{
1600 struct file *file = iocb->ki_filp;
1601 struct blk_plug plug;
1602 ssize_t ret;
1603
1604 blk_start_plug(&plug);
1605 ret = __generic_file_write_iter(iocb, from);
1606 if (ret > 0) {
1607 ssize_t err;
1608 err = generic_write_sync(file, iocb->ki_pos - ret, ret);
1609 if (err < 0)
1610 ret = err;
1611 }
1612 blk_finish_plug(&plug);
1613 return ret;
1614}
1615EXPORT_SYMBOL_GPL(blkdev_write_iter);
1616
1617ssize_t blkdev_read_iter(struct kiocb *iocb, struct iov_iter *to)
1618{
1619 struct file *file = iocb->ki_filp;
1620 struct inode *bd_inode = file->f_mapping->host;
1621 loff_t size = i_size_read(bd_inode);
1622 loff_t pos = iocb->ki_pos;
1623
1624 if (pos >= size)
1625 return 0;
1626
1627 size -= pos;
1628 iov_iter_truncate(to, size);
1629 return generic_file_read_iter(iocb, to);
1630}
1631EXPORT_SYMBOL_GPL(blkdev_read_iter);
1632
1633
1634
1635
1636
1637static int blkdev_releasepage(struct page *page, gfp_t wait)
1638{
1639 struct super_block *super = BDEV_I(page->mapping->host)->bdev.bd_super;
1640
1641 if (super && super->s_op->bdev_try_to_free_page)
1642 return super->s_op->bdev_try_to_free_page(super, page, wait);
1643
1644 return try_to_free_buffers(page);
1645}
1646
1647static const struct address_space_operations def_blk_aops = {
1648 .readpage = blkdev_readpage,
1649 .readpages = blkdev_readpages,
1650 .writepage = blkdev_writepage,
1651 .write_begin = blkdev_write_begin,
1652 .write_end = blkdev_write_end,
1653 .writepages = generic_writepages,
1654 .releasepage = blkdev_releasepage,
1655 .direct_IO = blkdev_direct_IO,
1656 .is_dirty_writeback = buffer_check_dirty_writeback,
1657};
1658
1659const struct file_operations def_blk_fops = {
1660 .open = blkdev_open,
1661 .release = blkdev_close,
1662 .llseek = block_llseek,
1663 .read = new_sync_read,
1664 .write = new_sync_write,
1665 .read_iter = blkdev_read_iter,
1666 .write_iter = blkdev_write_iter,
1667 .mmap = generic_file_mmap,
1668 .fsync = blkdev_fsync,
1669 .unlocked_ioctl = block_ioctl,
1670#ifdef CONFIG_COMPAT
1671 .compat_ioctl = compat_blkdev_ioctl,
1672#endif
1673 .splice_read = generic_file_splice_read,
1674 .splice_write = iter_file_splice_write,
1675};
1676
1677int ioctl_by_bdev(struct block_device *bdev, unsigned cmd, unsigned long arg)
1678{
1679 int res;
1680 mm_segment_t old_fs = get_fs();
1681 set_fs(KERNEL_DS);
1682 res = blkdev_ioctl(bdev, 0, cmd, arg);
1683 set_fs(old_fs);
1684 return res;
1685}
1686
1687EXPORT_SYMBOL(ioctl_by_bdev);
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697struct block_device *lookup_bdev(const char *pathname)
1698{
1699 struct block_device *bdev;
1700 struct inode *inode;
1701 struct path path;
1702 int error;
1703
1704 if (!pathname || !*pathname)
1705 return ERR_PTR(-EINVAL);
1706
1707 error = kern_path(pathname, LOOKUP_FOLLOW, &path);
1708 if (error)
1709 return ERR_PTR(error);
1710
1711 inode = path.dentry->d_inode;
1712 error = -ENOTBLK;
1713 if (!S_ISBLK(inode->i_mode))
1714 goto fail;
1715 error = -EACCES;
1716 if (path.mnt->mnt_flags & MNT_NODEV)
1717 goto fail;
1718 error = -ENOMEM;
1719 bdev = bd_acquire(inode);
1720 if (!bdev)
1721 goto fail;
1722out:
1723 path_put(&path);
1724 return bdev;
1725fail:
1726 bdev = ERR_PTR(error);
1727 goto out;
1728}
1729EXPORT_SYMBOL(lookup_bdev);
1730
1731int __invalidate_device(struct block_device *bdev, bool kill_dirty)
1732{
1733 struct super_block *sb = get_super(bdev);
1734 int res = 0;
1735
1736 if (sb) {
1737
1738
1739
1740
1741
1742
1743 shrink_dcache_sb(sb);
1744 res = invalidate_inodes(sb, kill_dirty);
1745 drop_super(sb);
1746 }
1747 invalidate_bdev(bdev);
1748 return res;
1749}
1750EXPORT_SYMBOL(__invalidate_device);
1751
1752void iterate_bdevs(void (*func)(struct block_device *, void *), void *arg)
1753{
1754 struct inode *inode, *old_inode = NULL;
1755
1756 spin_lock(&inode_sb_list_lock);
1757 list_for_each_entry(inode, &blockdev_superblock->s_inodes, i_sb_list) {
1758 struct address_space *mapping = inode->i_mapping;
1759
1760 spin_lock(&inode->i_lock);
1761 if (inode->i_state & (I_FREEING|I_WILL_FREE|I_NEW) ||
1762 mapping->nrpages == 0) {
1763 spin_unlock(&inode->i_lock);
1764 continue;
1765 }
1766 __iget(inode);
1767 spin_unlock(&inode->i_lock);
1768 spin_unlock(&inode_sb_list_lock);
1769
1770
1771
1772
1773
1774
1775
1776
1777 iput(old_inode);
1778 old_inode = inode;
1779
1780 func(I_BDEV(inode), arg);
1781
1782 spin_lock(&inode_sb_list_lock);
1783 }
1784 spin_unlock(&inode_sb_list_lock);
1785 iput(old_inode);
1786}
1787