1
2
3
4
5
6
7
8#include <linux/init.h>
9#include <linux/mm.h>
10#include <linux/fcntl.h>
11#include <linux/slab.h>
12#include <linux/kmod.h>
13#include <linux/major.h>
14#include <linux/device_cgroup.h>
15#include <linux/highmem.h>
16#include <linux/blkdev.h>
17#include <linux/module.h>
18#include <linux/blkpg.h>
19#include <linux/buffer_head.h>
20#include <linux/swap.h>
21#include <linux/pagevec.h>
22#include <linux/writeback.h>
23#include <linux/mpage.h>
24#include <linux/mount.h>
25#include <linux/uio.h>
26#include <linux/namei.h>
27#include <linux/log2.h>
28#include <linux/cleancache.h>
29#include <asm/uaccess.h>
30#include "internal.h"
31
32struct bdev_inode {
33 struct block_device bdev;
34 struct inode vfs_inode;
35};
36
37static const struct address_space_operations def_blk_aops;
38
39static inline struct bdev_inode *BDEV_I(struct inode *inode)
40{
41 return container_of(inode, struct bdev_inode, vfs_inode);
42}
43
44inline struct block_device *I_BDEV(struct inode *inode)
45{
46 return &BDEV_I(inode)->bdev;
47}
48EXPORT_SYMBOL(I_BDEV);
49
50
51
52
53
54
55static void bdev_inode_switch_bdi(struct inode *inode,
56 struct backing_dev_info *dst)
57{
58 struct backing_dev_info *old = inode->i_data.backing_dev_info;
59
60 if (unlikely(dst == old))
61 return;
62 bdi_lock_two(&old->wb, &dst->wb);
63 spin_lock(&inode->i_lock);
64 inode->i_data.backing_dev_info = dst;
65 if (inode->i_state & I_DIRTY)
66 list_move(&inode->i_wb_list, &dst->wb.b_dirty);
67 spin_unlock(&inode->i_lock);
68 spin_unlock(&old->wb.list_lock);
69 spin_unlock(&dst->wb.list_lock);
70}
71
72static sector_t max_block(struct block_device *bdev)
73{
74 sector_t retval = ~((sector_t)0);
75 loff_t sz = i_size_read(bdev->bd_inode);
76
77 if (sz) {
78 unsigned int size = block_size(bdev);
79 unsigned int sizebits = blksize_bits(size);
80 retval = (sz >> sizebits);
81 }
82 return retval;
83}
84
85
86void kill_bdev(struct block_device *bdev)
87{
88 struct address_space *mapping = bdev->bd_inode->i_mapping;
89
90 if (mapping->nrpages == 0)
91 return;
92
93 invalidate_bh_lrus();
94 truncate_inode_pages(mapping, 0);
95}
96EXPORT_SYMBOL(kill_bdev);
97
98
99void invalidate_bdev(struct block_device *bdev)
100{
101 struct address_space *mapping = bdev->bd_inode->i_mapping;
102
103 if (mapping->nrpages == 0)
104 return;
105
106 invalidate_bh_lrus();
107 lru_add_drain_all();
108 invalidate_mapping_pages(mapping, 0, -1);
109
110
111
112 cleancache_flush_inode(mapping);
113}
114EXPORT_SYMBOL(invalidate_bdev);
115
116int set_blocksize(struct block_device *bdev, int size)
117{
118
119 if (size > PAGE_SIZE || size < 512 || !is_power_of_2(size))
120 return -EINVAL;
121
122
123 if (size < bdev_logical_block_size(bdev))
124 return -EINVAL;
125
126
127 if (bdev->bd_block_size != size) {
128 sync_blockdev(bdev);
129 bdev->bd_block_size = size;
130 bdev->bd_inode->i_blkbits = blksize_bits(size);
131 kill_bdev(bdev);
132 }
133 return 0;
134}
135
136EXPORT_SYMBOL(set_blocksize);
137
138int sb_set_blocksize(struct super_block *sb, int size)
139{
140 if (set_blocksize(sb->s_bdev, size))
141 return 0;
142
143
144 sb->s_blocksize = size;
145 sb->s_blocksize_bits = blksize_bits(size);
146 return sb->s_blocksize;
147}
148
149EXPORT_SYMBOL(sb_set_blocksize);
150
151int sb_min_blocksize(struct super_block *sb, int size)
152{
153 int minsize = bdev_logical_block_size(sb->s_bdev);
154 if (size < minsize)
155 size = minsize;
156 return sb_set_blocksize(sb, size);
157}
158
159EXPORT_SYMBOL(sb_min_blocksize);
160
161static int
162blkdev_get_block(struct inode *inode, sector_t iblock,
163 struct buffer_head *bh, int create)
164{
165 if (iblock >= max_block(I_BDEV(inode))) {
166 if (create)
167 return -EIO;
168
169
170
171
172
173
174
175 return 0;
176 }
177 bh->b_bdev = I_BDEV(inode);
178 bh->b_blocknr = iblock;
179 set_buffer_mapped(bh);
180 return 0;
181}
182
183static int
184blkdev_get_blocks(struct inode *inode, sector_t iblock,
185 struct buffer_head *bh, int create)
186{
187 sector_t end_block = max_block(I_BDEV(inode));
188 unsigned long max_blocks = bh->b_size >> inode->i_blkbits;
189
190 if ((iblock + max_blocks) > end_block) {
191 max_blocks = end_block - iblock;
192 if ((long)max_blocks <= 0) {
193 if (create)
194 return -EIO;
195
196
197
198
199 max_blocks = 0;
200 }
201 }
202
203 bh->b_bdev = I_BDEV(inode);
204 bh->b_blocknr = iblock;
205 bh->b_size = max_blocks << inode->i_blkbits;
206 if (max_blocks)
207 set_buffer_mapped(bh);
208 return 0;
209}
210
211static ssize_t
212blkdev_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov,
213 loff_t offset, unsigned long nr_segs)
214{
215 struct file *file = iocb->ki_filp;
216 struct inode *inode = file->f_mapping->host;
217
218 return __blockdev_direct_IO(rw, iocb, inode, I_BDEV(inode), iov, offset,
219 nr_segs, blkdev_get_blocks, NULL, NULL, 0);
220}
221
222int __sync_blockdev(struct block_device *bdev, int wait)
223{
224 if (!bdev)
225 return 0;
226 if (!wait)
227 return filemap_flush(bdev->bd_inode->i_mapping);
228 return filemap_write_and_wait(bdev->bd_inode->i_mapping);
229}
230
231
232
233
234
235int sync_blockdev(struct block_device *bdev)
236{
237 return __sync_blockdev(bdev, 1);
238}
239EXPORT_SYMBOL(sync_blockdev);
240
241
242
243
244
245
246int fsync_bdev(struct block_device *bdev)
247{
248 struct super_block *sb = get_super(bdev);
249 if (sb) {
250 int res = sync_filesystem(sb);
251 drop_super(sb);
252 return res;
253 }
254 return sync_blockdev(bdev);
255}
256EXPORT_SYMBOL(fsync_bdev);
257
258
259
260
261
262
263
264
265
266
267
268
269
270struct super_block *freeze_bdev(struct block_device *bdev)
271{
272 struct super_block *sb;
273 int error = 0;
274
275 mutex_lock(&bdev->bd_fsfreeze_mutex);
276 if (++bdev->bd_fsfreeze_count > 1) {
277
278
279
280
281
282 sb = get_super(bdev);
283 drop_super(sb);
284 mutex_unlock(&bdev->bd_fsfreeze_mutex);
285 return sb;
286 }
287
288 sb = get_active_super(bdev);
289 if (!sb)
290 goto out;
291 error = freeze_super(sb);
292 if (error) {
293 deactivate_super(sb);
294 bdev->bd_fsfreeze_count--;
295 mutex_unlock(&bdev->bd_fsfreeze_mutex);
296 return ERR_PTR(error);
297 }
298 deactivate_super(sb);
299 out:
300 sync_blockdev(bdev);
301 mutex_unlock(&bdev->bd_fsfreeze_mutex);
302 return sb;
303}
304EXPORT_SYMBOL(freeze_bdev);
305
306
307
308
309
310
311
312
313int thaw_bdev(struct block_device *bdev, struct super_block *sb)
314{
315 int error = -EINVAL;
316
317 mutex_lock(&bdev->bd_fsfreeze_mutex);
318 if (!bdev->bd_fsfreeze_count)
319 goto out;
320
321 error = 0;
322 if (--bdev->bd_fsfreeze_count > 0)
323 goto out;
324
325 if (!sb)
326 goto out;
327
328 error = thaw_super(sb);
329 if (error) {
330 bdev->bd_fsfreeze_count++;
331 mutex_unlock(&bdev->bd_fsfreeze_mutex);
332 return error;
333 }
334out:
335 mutex_unlock(&bdev->bd_fsfreeze_mutex);
336 return 0;
337}
338EXPORT_SYMBOL(thaw_bdev);
339
340static int blkdev_writepage(struct page *page, struct writeback_control *wbc)
341{
342 return block_write_full_page(page, blkdev_get_block, wbc);
343}
344
345static int blkdev_readpage(struct file * file, struct page * page)
346{
347 return block_read_full_page(page, blkdev_get_block);
348}
349
350static int blkdev_write_begin(struct file *file, struct address_space *mapping,
351 loff_t pos, unsigned len, unsigned flags,
352 struct page **pagep, void **fsdata)
353{
354 return block_write_begin(mapping, pos, len, flags, pagep,
355 blkdev_get_block);
356}
357
358static int blkdev_write_end(struct file *file, struct address_space *mapping,
359 loff_t pos, unsigned len, unsigned copied,
360 struct page *page, void *fsdata)
361{
362 int ret;
363 ret = block_write_end(file, mapping, pos, len, copied, page, fsdata);
364
365 unlock_page(page);
366 page_cache_release(page);
367
368 return ret;
369}
370
371
372
373
374
375
376static loff_t block_llseek(struct file *file, loff_t offset, int origin)
377{
378 struct inode *bd_inode = file->f_mapping->host;
379 loff_t size;
380 loff_t retval;
381
382 mutex_lock(&bd_inode->i_mutex);
383 size = i_size_read(bd_inode);
384
385 retval = -EINVAL;
386 switch (origin) {
387 case SEEK_END:
388 offset += size;
389 break;
390 case SEEK_CUR:
391 offset += file->f_pos;
392 case SEEK_SET:
393 break;
394 default:
395 goto out;
396 }
397 if (offset >= 0 && offset <= size) {
398 if (offset != file->f_pos) {
399 file->f_pos = offset;
400 }
401 retval = offset;
402 }
403out:
404 mutex_unlock(&bd_inode->i_mutex);
405 return retval;
406}
407
408int blkdev_fsync(struct file *filp, loff_t start, loff_t end, int datasync)
409{
410 struct inode *bd_inode = filp->f_mapping->host;
411 struct block_device *bdev = I_BDEV(bd_inode);
412 int error;
413
414 error = filemap_write_and_wait_range(filp->f_mapping, start, end);
415 if (error)
416 return error;
417
418
419
420
421
422
423 error = blkdev_issue_flush(bdev, GFP_KERNEL, NULL);
424 if (error == -EOPNOTSUPP)
425 error = 0;
426
427 return error;
428}
429EXPORT_SYMBOL(blkdev_fsync);
430
431
432
433
434
435static __cacheline_aligned_in_smp DEFINE_SPINLOCK(bdev_lock);
436static struct kmem_cache * bdev_cachep __read_mostly;
437
438static struct inode *bdev_alloc_inode(struct super_block *sb)
439{
440 struct bdev_inode *ei = kmem_cache_alloc(bdev_cachep, GFP_KERNEL);
441 if (!ei)
442 return NULL;
443 return &ei->vfs_inode;
444}
445
446static void bdev_i_callback(struct rcu_head *head)
447{
448 struct inode *inode = container_of(head, struct inode, i_rcu);
449 struct bdev_inode *bdi = BDEV_I(inode);
450
451 kmem_cache_free(bdev_cachep, bdi);
452}
453
454static void bdev_destroy_inode(struct inode *inode)
455{
456 call_rcu(&inode->i_rcu, bdev_i_callback);
457}
458
459static void init_once(void *foo)
460{
461 struct bdev_inode *ei = (struct bdev_inode *) foo;
462 struct block_device *bdev = &ei->bdev;
463
464 memset(bdev, 0, sizeof(*bdev));
465 mutex_init(&bdev->bd_mutex);
466 INIT_LIST_HEAD(&bdev->bd_inodes);
467 INIT_LIST_HEAD(&bdev->bd_list);
468#ifdef CONFIG_SYSFS
469 INIT_LIST_HEAD(&bdev->bd_holder_disks);
470#endif
471 inode_init_once(&ei->vfs_inode);
472
473 mutex_init(&bdev->bd_fsfreeze_mutex);
474}
475
476static inline void __bd_forget(struct inode *inode)
477{
478 list_del_init(&inode->i_devices);
479 inode->i_bdev = NULL;
480 inode->i_mapping = &inode->i_data;
481}
482
483static void bdev_evict_inode(struct inode *inode)
484{
485 struct block_device *bdev = &BDEV_I(inode)->bdev;
486 struct list_head *p;
487 truncate_inode_pages(&inode->i_data, 0);
488 invalidate_inode_buffers(inode);
489 end_writeback(inode);
490 spin_lock(&bdev_lock);
491 while ( (p = bdev->bd_inodes.next) != &bdev->bd_inodes ) {
492 __bd_forget(list_entry(p, struct inode, i_devices));
493 }
494 list_del_init(&bdev->bd_list);
495 spin_unlock(&bdev_lock);
496}
497
498static const struct super_operations bdev_sops = {
499 .statfs = simple_statfs,
500 .alloc_inode = bdev_alloc_inode,
501 .destroy_inode = bdev_destroy_inode,
502 .drop_inode = generic_delete_inode,
503 .evict_inode = bdev_evict_inode,
504};
505
506static struct dentry *bd_mount(struct file_system_type *fs_type,
507 int flags, const char *dev_name, void *data)
508{
509 return mount_pseudo(fs_type, "bdev:", &bdev_sops, NULL, 0x62646576);
510}
511
512static struct file_system_type bd_type = {
513 .name = "bdev",
514 .mount = bd_mount,
515 .kill_sb = kill_anon_super,
516};
517
518static struct super_block *blockdev_superblock __read_mostly;
519
520void __init bdev_cache_init(void)
521{
522 int err;
523 static struct vfsmount *bd_mnt;
524
525 bdev_cachep = kmem_cache_create("bdev_cache", sizeof(struct bdev_inode),
526 0, (SLAB_HWCACHE_ALIGN|SLAB_RECLAIM_ACCOUNT|
527 SLAB_MEM_SPREAD|SLAB_PANIC),
528 init_once);
529 err = register_filesystem(&bd_type);
530 if (err)
531 panic("Cannot register bdev pseudo-fs");
532 bd_mnt = kern_mount(&bd_type);
533 if (IS_ERR(bd_mnt))
534 panic("Cannot create bdev pseudo-fs");
535 blockdev_superblock = bd_mnt->mnt_sb;
536}
537
538
539
540
541
542
543static inline unsigned long hash(dev_t dev)
544{
545 return MAJOR(dev)+MINOR(dev);
546}
547
548static int bdev_test(struct inode *inode, void *data)
549{
550 return BDEV_I(inode)->bdev.bd_dev == *(dev_t *)data;
551}
552
553static int bdev_set(struct inode *inode, void *data)
554{
555 BDEV_I(inode)->bdev.bd_dev = *(dev_t *)data;
556 return 0;
557}
558
559static LIST_HEAD(all_bdevs);
560
561struct block_device *bdget(dev_t dev)
562{
563 struct block_device *bdev;
564 struct inode *inode;
565
566 inode = iget5_locked(blockdev_superblock, hash(dev),
567 bdev_test, bdev_set, &dev);
568
569 if (!inode)
570 return NULL;
571
572 bdev = &BDEV_I(inode)->bdev;
573
574 if (inode->i_state & I_NEW) {
575 bdev->bd_contains = NULL;
576 bdev->bd_super = NULL;
577 bdev->bd_inode = inode;
578 bdev->bd_block_size = (1 << inode->i_blkbits);
579 bdev->bd_part_count = 0;
580 bdev->bd_invalidated = 0;
581 inode->i_mode = S_IFBLK;
582 inode->i_rdev = dev;
583 inode->i_bdev = bdev;
584 inode->i_data.a_ops = &def_blk_aops;
585 mapping_set_gfp_mask(&inode->i_data, GFP_USER);
586 inode->i_data.backing_dev_info = &default_backing_dev_info;
587 spin_lock(&bdev_lock);
588 list_add(&bdev->bd_list, &all_bdevs);
589 spin_unlock(&bdev_lock);
590 unlock_new_inode(inode);
591 }
592 return bdev;
593}
594
595EXPORT_SYMBOL(bdget);
596
597
598
599
600
601struct block_device *bdgrab(struct block_device *bdev)
602{
603 ihold(bdev->bd_inode);
604 return bdev;
605}
606
607long nr_blockdev_pages(void)
608{
609 struct block_device *bdev;
610 long ret = 0;
611 spin_lock(&bdev_lock);
612 list_for_each_entry(bdev, &all_bdevs, bd_list) {
613 ret += bdev->bd_inode->i_mapping->nrpages;
614 }
615 spin_unlock(&bdev_lock);
616 return ret;
617}
618
619void bdput(struct block_device *bdev)
620{
621 iput(bdev->bd_inode);
622}
623
624EXPORT_SYMBOL(bdput);
625
626static struct block_device *bd_acquire(struct inode *inode)
627{
628 struct block_device *bdev;
629
630 spin_lock(&bdev_lock);
631 bdev = inode->i_bdev;
632 if (bdev) {
633 ihold(bdev->bd_inode);
634 spin_unlock(&bdev_lock);
635 return bdev;
636 }
637 spin_unlock(&bdev_lock);
638
639 bdev = bdget(inode->i_rdev);
640 if (bdev) {
641 spin_lock(&bdev_lock);
642 if (!inode->i_bdev) {
643
644
645
646
647
648
649 ihold(bdev->bd_inode);
650 inode->i_bdev = bdev;
651 inode->i_mapping = bdev->bd_inode->i_mapping;
652 list_add(&inode->i_devices, &bdev->bd_inodes);
653 }
654 spin_unlock(&bdev_lock);
655 }
656 return bdev;
657}
658
659static inline int sb_is_blkdev_sb(struct super_block *sb)
660{
661 return sb == blockdev_superblock;
662}
663
664
665
666void bd_forget(struct inode *inode)
667{
668 struct block_device *bdev = NULL;
669
670 spin_lock(&bdev_lock);
671 if (inode->i_bdev) {
672 if (!sb_is_blkdev_sb(inode->i_sb))
673 bdev = inode->i_bdev;
674 __bd_forget(inode);
675 }
676 spin_unlock(&bdev_lock);
677
678 if (bdev)
679 iput(bdev->bd_inode);
680}
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696static bool bd_may_claim(struct block_device *bdev, struct block_device *whole,
697 void *holder)
698{
699 if (bdev->bd_holder == holder)
700 return true;
701 else if (bdev->bd_holder != NULL)
702 return false;
703 else if (bdev->bd_contains == bdev)
704 return true;
705
706 else if (whole->bd_holder == bd_may_claim)
707 return true;
708 else if (whole->bd_holder != NULL)
709 return false;
710 else
711 return true;
712}
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732static int bd_prepare_to_claim(struct block_device *bdev,
733 struct block_device *whole, void *holder)
734{
735retry:
736
737 if (!bd_may_claim(bdev, whole, holder))
738 return -EBUSY;
739
740
741 if (whole->bd_claiming) {
742 wait_queue_head_t *wq = bit_waitqueue(&whole->bd_claiming, 0);
743 DEFINE_WAIT(wait);
744
745 prepare_to_wait(wq, &wait, TASK_UNINTERRUPTIBLE);
746 spin_unlock(&bdev_lock);
747 schedule();
748 finish_wait(wq, &wait);
749 spin_lock(&bdev_lock);
750 goto retry;
751 }
752
753
754 return 0;
755}
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780static struct block_device *bd_start_claiming(struct block_device *bdev,
781 void *holder)
782{
783 struct gendisk *disk;
784 struct block_device *whole;
785 int partno, err;
786
787 might_sleep();
788
789
790
791
792
793 disk = get_gendisk(bdev->bd_dev, &partno);
794 if (!disk)
795 return ERR_PTR(-ENXIO);
796
797
798
799
800
801
802
803
804
805 if (partno)
806 whole = bdget_disk(disk, 0);
807 else
808 whole = bdgrab(bdev);
809
810 module_put(disk->fops->owner);
811 put_disk(disk);
812 if (!whole)
813 return ERR_PTR(-ENOMEM);
814
815
816 spin_lock(&bdev_lock);
817
818 err = bd_prepare_to_claim(bdev, whole, holder);
819 if (err == 0) {
820 whole->bd_claiming = holder;
821 spin_unlock(&bdev_lock);
822 return whole;
823 } else {
824 spin_unlock(&bdev_lock);
825 bdput(whole);
826 return ERR_PTR(err);
827 }
828}
829
830#ifdef CONFIG_SYSFS
831struct bd_holder_disk {
832 struct list_head list;
833 struct gendisk *disk;
834 int refcnt;
835};
836
837static struct bd_holder_disk *bd_find_holder_disk(struct block_device *bdev,
838 struct gendisk *disk)
839{
840 struct bd_holder_disk *holder;
841
842 list_for_each_entry(holder, &bdev->bd_holder_disks, list)
843 if (holder->disk == disk)
844 return holder;
845 return NULL;
846}
847
848static int add_symlink(struct kobject *from, struct kobject *to)
849{
850 return sysfs_create_link(from, to, kobject_name(to));
851}
852
853static void del_symlink(struct kobject *from, struct kobject *to)
854{
855 sysfs_remove_link(from, kobject_name(to));
856}
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886int bd_link_disk_holder(struct block_device *bdev, struct gendisk *disk)
887{
888 struct bd_holder_disk *holder;
889 int ret = 0;
890
891 mutex_lock(&bdev->bd_mutex);
892
893 WARN_ON_ONCE(!bdev->bd_holder);
894
895
896 if (WARN_ON(!disk->slave_dir || !bdev->bd_part->holder_dir))
897 goto out_unlock;
898
899 holder = bd_find_holder_disk(bdev, disk);
900 if (holder) {
901 holder->refcnt++;
902 goto out_unlock;
903 }
904
905 holder = kzalloc(sizeof(*holder), GFP_KERNEL);
906 if (!holder) {
907 ret = -ENOMEM;
908 goto out_unlock;
909 }
910
911 INIT_LIST_HEAD(&holder->list);
912 holder->disk = disk;
913 holder->refcnt = 1;
914
915 ret = add_symlink(disk->slave_dir, &part_to_dev(bdev->bd_part)->kobj);
916 if (ret)
917 goto out_free;
918
919 ret = add_symlink(bdev->bd_part->holder_dir, &disk_to_dev(disk)->kobj);
920 if (ret)
921 goto out_del;
922
923
924
925
926 kobject_get(bdev->bd_part->holder_dir);
927
928 list_add(&holder->list, &bdev->bd_holder_disks);
929 goto out_unlock;
930
931out_del:
932 del_symlink(disk->slave_dir, &part_to_dev(bdev->bd_part)->kobj);
933out_free:
934 kfree(holder);
935out_unlock:
936 mutex_unlock(&bdev->bd_mutex);
937 return ret;
938}
939EXPORT_SYMBOL_GPL(bd_link_disk_holder);
940
941
942
943
944
945
946
947
948
949
950
951void bd_unlink_disk_holder(struct block_device *bdev, struct gendisk *disk)
952{
953 struct bd_holder_disk *holder;
954
955 mutex_lock(&bdev->bd_mutex);
956
957 holder = bd_find_holder_disk(bdev, disk);
958
959 if (!WARN_ON_ONCE(holder == NULL) && !--holder->refcnt) {
960 del_symlink(disk->slave_dir, &part_to_dev(bdev->bd_part)->kobj);
961 del_symlink(bdev->bd_part->holder_dir,
962 &disk_to_dev(disk)->kobj);
963 kobject_put(bdev->bd_part->holder_dir);
964 list_del_init(&holder->list);
965 kfree(holder);
966 }
967
968 mutex_unlock(&bdev->bd_mutex);
969}
970EXPORT_SYMBOL_GPL(bd_unlink_disk_holder);
971#endif
972
973
974
975
976
977
978
979
980
981
982
983static void flush_disk(struct block_device *bdev, bool kill_dirty)
984{
985 if (__invalidate_device(bdev, kill_dirty)) {
986 char name[BDEVNAME_SIZE] = "";
987
988 if (bdev->bd_disk)
989 disk_name(bdev->bd_disk, 0, name);
990 printk(KERN_WARNING "VFS: busy inodes on changed media or "
991 "resized disk %s\n", name);
992 }
993
994 if (!bdev->bd_disk)
995 return;
996 if (disk_part_scan_enabled(bdev->bd_disk))
997 bdev->bd_invalidated = 1;
998}
999
1000
1001
1002
1003
1004
1005
1006
1007
1008void check_disk_size_change(struct gendisk *disk, struct block_device *bdev)
1009{
1010 loff_t disk_size, bdev_size;
1011
1012 disk_size = (loff_t)get_capacity(disk) << 9;
1013 bdev_size = i_size_read(bdev->bd_inode);
1014 if (disk_size != bdev_size) {
1015 char name[BDEVNAME_SIZE];
1016
1017 disk_name(disk, 0, name);
1018 printk(KERN_INFO
1019 "%s: detected capacity change from %lld to %lld\n",
1020 name, bdev_size, disk_size);
1021 i_size_write(bdev->bd_inode, disk_size);
1022 flush_disk(bdev, false);
1023 }
1024}
1025EXPORT_SYMBOL(check_disk_size_change);
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035int revalidate_disk(struct gendisk *disk)
1036{
1037 struct block_device *bdev;
1038 int ret = 0;
1039
1040 if (disk->fops->revalidate_disk)
1041 ret = disk->fops->revalidate_disk(disk);
1042
1043 bdev = bdget_disk(disk, 0);
1044 if (!bdev)
1045 return ret;
1046
1047 mutex_lock(&bdev->bd_mutex);
1048 check_disk_size_change(disk, bdev);
1049 mutex_unlock(&bdev->bd_mutex);
1050 bdput(bdev);
1051 return ret;
1052}
1053EXPORT_SYMBOL(revalidate_disk);
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064int check_disk_change(struct block_device *bdev)
1065{
1066 struct gendisk *disk = bdev->bd_disk;
1067 const struct block_device_operations *bdops = disk->fops;
1068 unsigned int events;
1069
1070 events = disk_clear_events(disk, DISK_EVENT_MEDIA_CHANGE |
1071 DISK_EVENT_EJECT_REQUEST);
1072 if (!(events & DISK_EVENT_MEDIA_CHANGE))
1073 return 0;
1074
1075 flush_disk(bdev, true);
1076 if (bdops->revalidate_disk)
1077 bdops->revalidate_disk(bdev->bd_disk);
1078 return 1;
1079}
1080
1081EXPORT_SYMBOL(check_disk_change);
1082
1083void bd_set_size(struct block_device *bdev, loff_t size)
1084{
1085 unsigned bsize = bdev_logical_block_size(bdev);
1086
1087 bdev->bd_inode->i_size = size;
1088 while (bsize < PAGE_CACHE_SIZE) {
1089 if (size & bsize)
1090 break;
1091 bsize <<= 1;
1092 }
1093 bdev->bd_block_size = bsize;
1094 bdev->bd_inode->i_blkbits = blksize_bits(bsize);
1095}
1096EXPORT_SYMBOL(bd_set_size);
1097
1098static int __blkdev_put(struct block_device *bdev, fmode_t mode, int for_part);
1099
1100
1101
1102
1103
1104
1105
1106
1107static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part)
1108{
1109 struct gendisk *disk;
1110 struct module *owner;
1111 int ret;
1112 int partno;
1113 int perm = 0;
1114
1115 if (mode & FMODE_READ)
1116 perm |= MAY_READ;
1117 if (mode & FMODE_WRITE)
1118 perm |= MAY_WRITE;
1119
1120
1121
1122 if (!for_part) {
1123 ret = devcgroup_inode_permission(bdev->bd_inode, perm);
1124 if (ret != 0) {
1125 bdput(bdev);
1126 return ret;
1127 }
1128 }
1129
1130 restart:
1131
1132 ret = -ENXIO;
1133 disk = get_gendisk(bdev->bd_dev, &partno);
1134 if (!disk)
1135 goto out;
1136 owner = disk->fops->owner;
1137
1138 disk_block_events(disk);
1139 mutex_lock_nested(&bdev->bd_mutex, for_part);
1140 if (!bdev->bd_openers) {
1141 bdev->bd_disk = disk;
1142 bdev->bd_queue = disk->queue;
1143 bdev->bd_contains = bdev;
1144 if (!partno) {
1145 struct backing_dev_info *bdi;
1146
1147 ret = -ENXIO;
1148 bdev->bd_part = disk_get_part(disk, partno);
1149 if (!bdev->bd_part)
1150 goto out_clear;
1151
1152 ret = 0;
1153 if (disk->fops->open) {
1154 ret = disk->fops->open(bdev, mode);
1155 if (ret == -ERESTARTSYS) {
1156
1157
1158
1159
1160 disk_put_part(bdev->bd_part);
1161 bdev->bd_part = NULL;
1162 bdev->bd_disk = NULL;
1163 bdev->bd_queue = NULL;
1164 mutex_unlock(&bdev->bd_mutex);
1165 disk_unblock_events(disk);
1166 put_disk(disk);
1167 module_put(owner);
1168 goto restart;
1169 }
1170 }
1171
1172 if (!ret && !bdev->bd_openers) {
1173 bd_set_size(bdev,(loff_t)get_capacity(disk)<<9);
1174 bdi = blk_get_backing_dev_info(bdev);
1175 if (bdi == NULL)
1176 bdi = &default_backing_dev_info;
1177 bdev_inode_switch_bdi(bdev->bd_inode, bdi);
1178 }
1179
1180
1181
1182
1183
1184
1185
1186 if (bdev->bd_invalidated) {
1187 if (!ret)
1188 rescan_partitions(disk, bdev);
1189 else if (ret == -ENOMEDIUM)
1190 invalidate_partitions(disk, bdev);
1191 }
1192 if (ret)
1193 goto out_clear;
1194 } else {
1195 struct block_device *whole;
1196 whole = bdget_disk(disk, 0);
1197 ret = -ENOMEM;
1198 if (!whole)
1199 goto out_clear;
1200 BUG_ON(for_part);
1201 ret = __blkdev_get(whole, mode, 1);
1202 if (ret)
1203 goto out_clear;
1204 bdev->bd_contains = whole;
1205 bdev_inode_switch_bdi(bdev->bd_inode,
1206 whole->bd_inode->i_data.backing_dev_info);
1207 bdev->bd_part = disk_get_part(disk, partno);
1208 if (!(disk->flags & GENHD_FL_UP) ||
1209 !bdev->bd_part || !bdev->bd_part->nr_sects) {
1210 ret = -ENXIO;
1211 goto out_clear;
1212 }
1213 bd_set_size(bdev, (loff_t)bdev->bd_part->nr_sects << 9);
1214 }
1215 } else {
1216 if (bdev->bd_contains == bdev) {
1217 ret = 0;
1218 if (bdev->bd_disk->fops->open)
1219 ret = bdev->bd_disk->fops->open(bdev, mode);
1220
1221 if (bdev->bd_invalidated) {
1222 if (!ret)
1223 rescan_partitions(bdev->bd_disk, bdev);
1224 else if (ret == -ENOMEDIUM)
1225 invalidate_partitions(bdev->bd_disk, bdev);
1226 }
1227 if (ret)
1228 goto out_unlock_bdev;
1229 }
1230
1231 put_disk(disk);
1232 module_put(owner);
1233 }
1234 bdev->bd_openers++;
1235 if (for_part)
1236 bdev->bd_part_count++;
1237 mutex_unlock(&bdev->bd_mutex);
1238 disk_unblock_events(disk);
1239 return 0;
1240
1241 out_clear:
1242 disk_put_part(bdev->bd_part);
1243 bdev->bd_disk = NULL;
1244 bdev->bd_part = NULL;
1245 bdev->bd_queue = NULL;
1246 bdev_inode_switch_bdi(bdev->bd_inode, &default_backing_dev_info);
1247 if (bdev != bdev->bd_contains)
1248 __blkdev_put(bdev->bd_contains, mode, 1);
1249 bdev->bd_contains = NULL;
1250 out_unlock_bdev:
1251 mutex_unlock(&bdev->bd_mutex);
1252 disk_unblock_events(disk);
1253 put_disk(disk);
1254 module_put(owner);
1255 out:
1256 bdput(bdev);
1257
1258 return ret;
1259}
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280int blkdev_get(struct block_device *bdev, fmode_t mode, void *holder)
1281{
1282 struct block_device *whole = NULL;
1283 int res;
1284
1285 WARN_ON_ONCE((mode & FMODE_EXCL) && !holder);
1286
1287 if ((mode & FMODE_EXCL) && holder) {
1288 whole = bd_start_claiming(bdev, holder);
1289 if (IS_ERR(whole)) {
1290 bdput(bdev);
1291 return PTR_ERR(whole);
1292 }
1293 }
1294
1295 res = __blkdev_get(bdev, mode, 0);
1296
1297 if (whole) {
1298 struct gendisk *disk = whole->bd_disk;
1299
1300
1301 mutex_lock(&bdev->bd_mutex);
1302 spin_lock(&bdev_lock);
1303
1304 if (!res) {
1305 BUG_ON(!bd_may_claim(bdev, whole, holder));
1306
1307
1308
1309
1310
1311
1312 whole->bd_holders++;
1313 whole->bd_holder = bd_may_claim;
1314 bdev->bd_holders++;
1315 bdev->bd_holder = holder;
1316 }
1317
1318
1319 BUG_ON(whole->bd_claiming != holder);
1320 whole->bd_claiming = NULL;
1321 wake_up_bit(&whole->bd_claiming, 0);
1322
1323 spin_unlock(&bdev_lock);
1324
1325
1326
1327
1328
1329
1330
1331
1332 if (!res && (mode & FMODE_WRITE) && !bdev->bd_write_holder &&
1333 (disk->flags & GENHD_FL_BLOCK_EVENTS_ON_EXCL_WRITE)) {
1334 bdev->bd_write_holder = true;
1335 disk_block_events(disk);
1336 }
1337
1338 mutex_unlock(&bdev->bd_mutex);
1339 bdput(whole);
1340 }
1341
1342 return res;
1343}
1344EXPORT_SYMBOL(blkdev_get);
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363struct block_device *blkdev_get_by_path(const char *path, fmode_t mode,
1364 void *holder)
1365{
1366 struct block_device *bdev;
1367 int err;
1368
1369 bdev = lookup_bdev(path);
1370 if (IS_ERR(bdev))
1371 return bdev;
1372
1373 err = blkdev_get(bdev, mode, holder);
1374 if (err)
1375 return ERR_PTR(err);
1376
1377 if ((mode & FMODE_WRITE) && bdev_read_only(bdev)) {
1378 blkdev_put(bdev, mode);
1379 return ERR_PTR(-EACCES);
1380 }
1381
1382 return bdev;
1383}
1384EXPORT_SYMBOL(blkdev_get_by_path);
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408struct block_device *blkdev_get_by_dev(dev_t dev, fmode_t mode, void *holder)
1409{
1410 struct block_device *bdev;
1411 int err;
1412
1413 bdev = bdget(dev);
1414 if (!bdev)
1415 return ERR_PTR(-ENOMEM);
1416
1417 err = blkdev_get(bdev, mode, holder);
1418 if (err)
1419 return ERR_PTR(err);
1420
1421 return bdev;
1422}
1423EXPORT_SYMBOL(blkdev_get_by_dev);
1424
1425static int blkdev_open(struct inode * inode, struct file * filp)
1426{
1427 struct block_device *bdev;
1428
1429
1430
1431
1432
1433
1434
1435 filp->f_flags |= O_LARGEFILE;
1436
1437 if (filp->f_flags & O_NDELAY)
1438 filp->f_mode |= FMODE_NDELAY;
1439 if (filp->f_flags & O_EXCL)
1440 filp->f_mode |= FMODE_EXCL;
1441 if ((filp->f_flags & O_ACCMODE) == 3)
1442 filp->f_mode |= FMODE_WRITE_IOCTL;
1443
1444 bdev = bd_acquire(inode);
1445 if (bdev == NULL)
1446 return -ENOMEM;
1447
1448 filp->f_mapping = bdev->bd_inode->i_mapping;
1449
1450 return blkdev_get(bdev, filp->f_mode, filp);
1451}
1452
1453static int __blkdev_put(struct block_device *bdev, fmode_t mode, int for_part)
1454{
1455 int ret = 0;
1456 struct gendisk *disk = bdev->bd_disk;
1457 struct block_device *victim = NULL;
1458
1459 mutex_lock_nested(&bdev->bd_mutex, for_part);
1460 if (for_part)
1461 bdev->bd_part_count--;
1462
1463 if (!--bdev->bd_openers) {
1464 WARN_ON_ONCE(bdev->bd_holders);
1465 sync_blockdev(bdev);
1466 kill_bdev(bdev);
1467
1468
1469
1470 bdev_inode_switch_bdi(bdev->bd_inode,
1471 &default_backing_dev_info);
1472 }
1473 if (bdev->bd_contains == bdev) {
1474 if (disk->fops->release)
1475 ret = disk->fops->release(disk, mode);
1476 }
1477 if (!bdev->bd_openers) {
1478 struct module *owner = disk->fops->owner;
1479
1480 disk_put_part(bdev->bd_part);
1481 bdev->bd_part = NULL;
1482 bdev->bd_disk = NULL;
1483 if (bdev != bdev->bd_contains)
1484 victim = bdev->bd_contains;
1485 bdev->bd_contains = NULL;
1486
1487 put_disk(disk);
1488 module_put(owner);
1489 }
1490 mutex_unlock(&bdev->bd_mutex);
1491 bdput(bdev);
1492 if (victim)
1493 __blkdev_put(victim, mode, 1);
1494 return ret;
1495}
1496
1497int blkdev_put(struct block_device *bdev, fmode_t mode)
1498{
1499 mutex_lock(&bdev->bd_mutex);
1500
1501 if (mode & FMODE_EXCL) {
1502 bool bdev_free;
1503
1504
1505
1506
1507
1508
1509 spin_lock(&bdev_lock);
1510
1511 WARN_ON_ONCE(--bdev->bd_holders < 0);
1512 WARN_ON_ONCE(--bdev->bd_contains->bd_holders < 0);
1513
1514
1515 if ((bdev_free = !bdev->bd_holders))
1516 bdev->bd_holder = NULL;
1517 if (!bdev->bd_contains->bd_holders)
1518 bdev->bd_contains->bd_holder = NULL;
1519
1520 spin_unlock(&bdev_lock);
1521
1522
1523
1524
1525
1526 if (bdev_free && bdev->bd_write_holder) {
1527 disk_unblock_events(bdev->bd_disk);
1528 bdev->bd_write_holder = false;
1529 }
1530 }
1531
1532
1533
1534
1535
1536
1537 disk_flush_events(bdev->bd_disk, DISK_EVENT_MEDIA_CHANGE);
1538
1539 mutex_unlock(&bdev->bd_mutex);
1540
1541 return __blkdev_put(bdev, mode, 0);
1542}
1543EXPORT_SYMBOL(blkdev_put);
1544
1545static int blkdev_close(struct inode * inode, struct file * filp)
1546{
1547 struct block_device *bdev = I_BDEV(filp->f_mapping->host);
1548
1549 return blkdev_put(bdev, filp->f_mode);
1550}
1551
1552static long block_ioctl(struct file *file, unsigned cmd, unsigned long arg)
1553{
1554 struct block_device *bdev = I_BDEV(file->f_mapping->host);
1555 fmode_t mode = file->f_mode;
1556
1557
1558
1559
1560
1561 if (file->f_flags & O_NDELAY)
1562 mode |= FMODE_NDELAY;
1563 else
1564 mode &= ~FMODE_NDELAY;
1565
1566 return blkdev_ioctl(bdev, mode, cmd, arg);
1567}
1568
1569
1570
1571
1572
1573
1574
1575
1576ssize_t blkdev_aio_write(struct kiocb *iocb, const struct iovec *iov,
1577 unsigned long nr_segs, loff_t pos)
1578{
1579 struct file *file = iocb->ki_filp;
1580 ssize_t ret;
1581
1582 BUG_ON(iocb->ki_pos != pos);
1583
1584 ret = __generic_file_aio_write(iocb, iov, nr_segs, &iocb->ki_pos);
1585 if (ret > 0 || ret == -EIOCBQUEUED) {
1586 ssize_t err;
1587
1588 err = generic_write_sync(file, pos, ret);
1589 if (err < 0 && ret > 0)
1590 ret = err;
1591 }
1592 return ret;
1593}
1594EXPORT_SYMBOL_GPL(blkdev_aio_write);
1595
1596
1597
1598
1599
1600static int blkdev_releasepage(struct page *page, gfp_t wait)
1601{
1602 struct super_block *super = BDEV_I(page->mapping->host)->bdev.bd_super;
1603
1604 if (super && super->s_op->bdev_try_to_free_page)
1605 return super->s_op->bdev_try_to_free_page(super, page, wait);
1606
1607 return try_to_free_buffers(page);
1608}
1609
1610static const struct address_space_operations def_blk_aops = {
1611 .readpage = blkdev_readpage,
1612 .writepage = blkdev_writepage,
1613 .write_begin = blkdev_write_begin,
1614 .write_end = blkdev_write_end,
1615 .writepages = generic_writepages,
1616 .releasepage = blkdev_releasepage,
1617 .direct_IO = blkdev_direct_IO,
1618};
1619
1620const struct file_operations def_blk_fops = {
1621 .open = blkdev_open,
1622 .release = blkdev_close,
1623 .llseek = block_llseek,
1624 .read = do_sync_read,
1625 .write = do_sync_write,
1626 .aio_read = generic_file_aio_read,
1627 .aio_write = blkdev_aio_write,
1628 .mmap = generic_file_mmap,
1629 .fsync = blkdev_fsync,
1630 .unlocked_ioctl = block_ioctl,
1631#ifdef CONFIG_COMPAT
1632 .compat_ioctl = compat_blkdev_ioctl,
1633#endif
1634 .splice_read = generic_file_splice_read,
1635 .splice_write = generic_file_splice_write,
1636};
1637
1638int ioctl_by_bdev(struct block_device *bdev, unsigned cmd, unsigned long arg)
1639{
1640 int res;
1641 mm_segment_t old_fs = get_fs();
1642 set_fs(KERNEL_DS);
1643 res = blkdev_ioctl(bdev, 0, cmd, arg);
1644 set_fs(old_fs);
1645 return res;
1646}
1647
1648EXPORT_SYMBOL(ioctl_by_bdev);
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658struct block_device *lookup_bdev(const char *pathname)
1659{
1660 struct block_device *bdev;
1661 struct inode *inode;
1662 struct path path;
1663 int error;
1664
1665 if (!pathname || !*pathname)
1666 return ERR_PTR(-EINVAL);
1667
1668 error = kern_path(pathname, LOOKUP_FOLLOW, &path);
1669 if (error)
1670 return ERR_PTR(error);
1671
1672 inode = path.dentry->d_inode;
1673 error = -ENOTBLK;
1674 if (!S_ISBLK(inode->i_mode))
1675 goto fail;
1676 error = -EACCES;
1677 if (path.mnt->mnt_flags & MNT_NODEV)
1678 goto fail;
1679 error = -ENOMEM;
1680 bdev = bd_acquire(inode);
1681 if (!bdev)
1682 goto fail;
1683out:
1684 path_put(&path);
1685 return bdev;
1686fail:
1687 bdev = ERR_PTR(error);
1688 goto out;
1689}
1690EXPORT_SYMBOL(lookup_bdev);
1691
1692int __invalidate_device(struct block_device *bdev, bool kill_dirty)
1693{
1694 struct super_block *sb = get_super(bdev);
1695 int res = 0;
1696
1697 if (sb) {
1698
1699
1700
1701
1702
1703
1704 shrink_dcache_sb(sb);
1705 res = invalidate_inodes(sb, kill_dirty);
1706 drop_super(sb);
1707 }
1708 invalidate_bdev(bdev);
1709 return res;
1710}
1711EXPORT_SYMBOL(__invalidate_device);
1712