1
2
3
4
5
6
7
8#include <linux/init.h>
9#include <linux/mm.h>
10#include <linux/fcntl.h>
11#include <linux/slab.h>
12#include <linux/kmod.h>
13#include <linux/major.h>
14#include <linux/device_cgroup.h>
15#include <linux/highmem.h>
16#include <linux/blkdev.h>
17#include <linux/module.h>
18#include <linux/blkpg.h>
19#include <linux/magic.h>
20#include <linux/buffer_head.h>
21#include <linux/swap.h>
22#include <linux/pagevec.h>
23#include <linux/writeback.h>
24#include <linux/mpage.h>
25#include <linux/mount.h>
26#include <linux/uio.h>
27#include <linux/namei.h>
28#include <linux/log2.h>
29#include <linux/cleancache.h>
30#include <linux/aio.h>
31#include <asm/uaccess.h>
32#include "internal.h"
33
34struct bdev_inode {
35 struct block_device bdev;
36 struct inode vfs_inode;
37};
38
39static const struct address_space_operations def_blk_aops;
40
41static inline struct bdev_inode *BDEV_I(struct inode *inode)
42{
43 return container_of(inode, struct bdev_inode, vfs_inode);
44}
45
46inline struct block_device *I_BDEV(struct inode *inode)
47{
48 return &BDEV_I(inode)->bdev;
49}
50EXPORT_SYMBOL(I_BDEV);
51
52
53
54
55
56
57static void bdev_inode_switch_bdi(struct inode *inode,
58 struct backing_dev_info *dst)
59{
60 struct backing_dev_info *old = inode->i_data.backing_dev_info;
61 bool wakeup_bdi = false;
62
63 if (unlikely(dst == old))
64 return;
65 bdi_lock_two(&old->wb, &dst->wb);
66 spin_lock(&inode->i_lock);
67 inode->i_data.backing_dev_info = dst;
68 if (inode->i_state & I_DIRTY) {
69 if (bdi_cap_writeback_dirty(dst) && !wb_has_dirty_io(&dst->wb))
70 wakeup_bdi = true;
71 list_move(&inode->i_wb_list, &dst->wb.b_dirty);
72 }
73 spin_unlock(&inode->i_lock);
74 spin_unlock(&old->wb.list_lock);
75 spin_unlock(&dst->wb.list_lock);
76
77 if (wakeup_bdi)
78 bdi_wakeup_thread_delayed(dst);
79}
80
81
82void kill_bdev(struct block_device *bdev)
83{
84 struct address_space *mapping = bdev->bd_inode->i_mapping;
85
86 if (mapping->nrpages == 0)
87 return;
88
89 invalidate_bh_lrus();
90 truncate_inode_pages(mapping, 0);
91}
92EXPORT_SYMBOL(kill_bdev);
93
94
95void invalidate_bdev(struct block_device *bdev)
96{
97 struct address_space *mapping = bdev->bd_inode->i_mapping;
98
99 if (mapping->nrpages == 0)
100 return;
101
102 invalidate_bh_lrus();
103 lru_add_drain_all();
104 invalidate_mapping_pages(mapping, 0, -1);
105
106
107
108 cleancache_invalidate_inode(mapping);
109}
110EXPORT_SYMBOL(invalidate_bdev);
111
112int set_blocksize(struct block_device *bdev, int size)
113{
114
115 if (size > PAGE_SIZE || size < 512 || !is_power_of_2(size))
116 return -EINVAL;
117
118
119 if (size < bdev_logical_block_size(bdev))
120 return -EINVAL;
121
122
123 if (bdev->bd_block_size != size) {
124 sync_blockdev(bdev);
125 bdev->bd_block_size = size;
126 bdev->bd_inode->i_blkbits = blksize_bits(size);
127 kill_bdev(bdev);
128 }
129 return 0;
130}
131
132EXPORT_SYMBOL(set_blocksize);
133
134int sb_set_blocksize(struct super_block *sb, int size)
135{
136 if (set_blocksize(sb->s_bdev, size))
137 return 0;
138
139
140 sb->s_blocksize = size;
141 sb->s_blocksize_bits = blksize_bits(size);
142 return sb->s_blocksize;
143}
144
145EXPORT_SYMBOL(sb_set_blocksize);
146
147int sb_min_blocksize(struct super_block *sb, int size)
148{
149 int minsize = bdev_logical_block_size(sb->s_bdev);
150 if (size < minsize)
151 size = minsize;
152 return sb_set_blocksize(sb, size);
153}
154
155EXPORT_SYMBOL(sb_min_blocksize);
156
157static int
158blkdev_get_block(struct inode *inode, sector_t iblock,
159 struct buffer_head *bh, int create)
160{
161 bh->b_bdev = I_BDEV(inode);
162 bh->b_blocknr = iblock;
163 set_buffer_mapped(bh);
164 return 0;
165}
166
167static ssize_t
168blkdev_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov,
169 loff_t offset, unsigned long nr_segs)
170{
171 struct file *file = iocb->ki_filp;
172 struct inode *inode = file->f_mapping->host;
173
174 return __blockdev_direct_IO(rw, iocb, inode, I_BDEV(inode), iov, offset,
175 nr_segs, blkdev_get_block, NULL, NULL, 0);
176}
177
178int __sync_blockdev(struct block_device *bdev, int wait)
179{
180 if (!bdev)
181 return 0;
182 if (!wait)
183 return filemap_flush(bdev->bd_inode->i_mapping);
184 return filemap_write_and_wait(bdev->bd_inode->i_mapping);
185}
186
187
188
189
190
191int sync_blockdev(struct block_device *bdev)
192{
193 return __sync_blockdev(bdev, 1);
194}
195EXPORT_SYMBOL(sync_blockdev);
196
197
198
199
200
201
202int fsync_bdev(struct block_device *bdev)
203{
204 struct super_block *sb = get_super(bdev);
205 if (sb) {
206 int res = sync_filesystem(sb);
207 drop_super(sb);
208 return res;
209 }
210 return sync_blockdev(bdev);
211}
212EXPORT_SYMBOL(fsync_bdev);
213
214
215
216
217
218
219
220
221
222
223
224
225
226struct super_block *freeze_bdev(struct block_device *bdev)
227{
228 struct super_block *sb;
229 int error = 0;
230
231 mutex_lock(&bdev->bd_fsfreeze_mutex);
232 if (++bdev->bd_fsfreeze_count > 1) {
233
234
235
236
237
238 sb = get_super(bdev);
239 drop_super(sb);
240 mutex_unlock(&bdev->bd_fsfreeze_mutex);
241 return sb;
242 }
243
244 sb = get_active_super(bdev);
245 if (!sb)
246 goto out;
247 error = freeze_super(sb);
248 if (error) {
249 deactivate_super(sb);
250 bdev->bd_fsfreeze_count--;
251 mutex_unlock(&bdev->bd_fsfreeze_mutex);
252 return ERR_PTR(error);
253 }
254 deactivate_super(sb);
255 out:
256 sync_blockdev(bdev);
257 mutex_unlock(&bdev->bd_fsfreeze_mutex);
258 return sb;
259}
260EXPORT_SYMBOL(freeze_bdev);
261
262
263
264
265
266
267
268
269int thaw_bdev(struct block_device *bdev, struct super_block *sb)
270{
271 int error = -EINVAL;
272
273 mutex_lock(&bdev->bd_fsfreeze_mutex);
274 if (!bdev->bd_fsfreeze_count)
275 goto out;
276
277 error = 0;
278 if (--bdev->bd_fsfreeze_count > 0)
279 goto out;
280
281 if (!sb)
282 goto out;
283
284 error = thaw_super(sb);
285 if (error) {
286 bdev->bd_fsfreeze_count++;
287 mutex_unlock(&bdev->bd_fsfreeze_mutex);
288 return error;
289 }
290out:
291 mutex_unlock(&bdev->bd_fsfreeze_mutex);
292 return 0;
293}
294EXPORT_SYMBOL(thaw_bdev);
295
296static int blkdev_writepage(struct page *page, struct writeback_control *wbc)
297{
298 return block_write_full_page(page, blkdev_get_block, wbc);
299}
300
301static int blkdev_readpage(struct file * file, struct page * page)
302{
303 return block_read_full_page(page, blkdev_get_block);
304}
305
306static int blkdev_write_begin(struct file *file, struct address_space *mapping,
307 loff_t pos, unsigned len, unsigned flags,
308 struct page **pagep, void **fsdata)
309{
310 return block_write_begin(mapping, pos, len, flags, pagep,
311 blkdev_get_block);
312}
313
314static int blkdev_write_end(struct file *file, struct address_space *mapping,
315 loff_t pos, unsigned len, unsigned copied,
316 struct page *page, void *fsdata)
317{
318 int ret;
319 ret = block_write_end(file, mapping, pos, len, copied, page, fsdata);
320
321 unlock_page(page);
322 page_cache_release(page);
323
324 return ret;
325}
326
327
328
329
330
331
332static loff_t block_llseek(struct file *file, loff_t offset, int whence)
333{
334 struct inode *bd_inode = file->f_mapping->host;
335 loff_t retval;
336
337 mutex_lock(&bd_inode->i_mutex);
338 retval = fixed_size_llseek(file, offset, whence, i_size_read(bd_inode));
339 mutex_unlock(&bd_inode->i_mutex);
340 return retval;
341}
342
343int blkdev_fsync(struct file *filp, loff_t start, loff_t end, int datasync)
344{
345 struct inode *bd_inode = filp->f_mapping->host;
346 struct block_device *bdev = I_BDEV(bd_inode);
347 int error;
348
349 error = filemap_write_and_wait_range(filp->f_mapping, start, end);
350 if (error)
351 return error;
352
353
354
355
356
357
358 error = blkdev_issue_flush(bdev, GFP_KERNEL, NULL);
359 if (error == -EOPNOTSUPP)
360 error = 0;
361
362 return error;
363}
364EXPORT_SYMBOL(blkdev_fsync);
365
366
367
368
369
370static __cacheline_aligned_in_smp DEFINE_SPINLOCK(bdev_lock);
371static struct kmem_cache * bdev_cachep __read_mostly;
372
373static struct inode *bdev_alloc_inode(struct super_block *sb)
374{
375 struct bdev_inode *ei = kmem_cache_alloc(bdev_cachep, GFP_KERNEL);
376 if (!ei)
377 return NULL;
378 return &ei->vfs_inode;
379}
380
381static void bdev_i_callback(struct rcu_head *head)
382{
383 struct inode *inode = container_of(head, struct inode, i_rcu);
384 struct bdev_inode *bdi = BDEV_I(inode);
385
386 kmem_cache_free(bdev_cachep, bdi);
387}
388
389static void bdev_destroy_inode(struct inode *inode)
390{
391 call_rcu(&inode->i_rcu, bdev_i_callback);
392}
393
394static void init_once(void *foo)
395{
396 struct bdev_inode *ei = (struct bdev_inode *) foo;
397 struct block_device *bdev = &ei->bdev;
398
399 memset(bdev, 0, sizeof(*bdev));
400 mutex_init(&bdev->bd_mutex);
401 INIT_LIST_HEAD(&bdev->bd_inodes);
402 INIT_LIST_HEAD(&bdev->bd_list);
403#ifdef CONFIG_SYSFS
404 INIT_LIST_HEAD(&bdev->bd_holder_disks);
405#endif
406 inode_init_once(&ei->vfs_inode);
407
408 mutex_init(&bdev->bd_fsfreeze_mutex);
409}
410
411static inline void __bd_forget(struct inode *inode)
412{
413 list_del_init(&inode->i_devices);
414 inode->i_bdev = NULL;
415 inode->i_mapping = &inode->i_data;
416}
417
418static void bdev_evict_inode(struct inode *inode)
419{
420 struct block_device *bdev = &BDEV_I(inode)->bdev;
421 struct list_head *p;
422 truncate_inode_pages(&inode->i_data, 0);
423 invalidate_inode_buffers(inode);
424 clear_inode(inode);
425 spin_lock(&bdev_lock);
426 while ( (p = bdev->bd_inodes.next) != &bdev->bd_inodes ) {
427 __bd_forget(list_entry(p, struct inode, i_devices));
428 }
429 list_del_init(&bdev->bd_list);
430 spin_unlock(&bdev_lock);
431}
432
433static const struct super_operations bdev_sops = {
434 .statfs = simple_statfs,
435 .alloc_inode = bdev_alloc_inode,
436 .destroy_inode = bdev_destroy_inode,
437 .drop_inode = generic_delete_inode,
438 .evict_inode = bdev_evict_inode,
439};
440
441static struct dentry *bd_mount(struct file_system_type *fs_type,
442 int flags, const char *dev_name, void *data)
443{
444 return mount_pseudo(fs_type, "bdev:", &bdev_sops, NULL, BDEVFS_MAGIC);
445}
446
447static struct file_system_type bd_type = {
448 .name = "bdev",
449 .mount = bd_mount,
450 .kill_sb = kill_anon_super,
451};
452
453static struct super_block *blockdev_superblock __read_mostly;
454
455void __init bdev_cache_init(void)
456{
457 int err;
458 static struct vfsmount *bd_mnt;
459
460 bdev_cachep = kmem_cache_create("bdev_cache", sizeof(struct bdev_inode),
461 0, (SLAB_HWCACHE_ALIGN|SLAB_RECLAIM_ACCOUNT|
462 SLAB_MEM_SPREAD|SLAB_PANIC),
463 init_once);
464 err = register_filesystem(&bd_type);
465 if (err)
466 panic("Cannot register bdev pseudo-fs");
467 bd_mnt = kern_mount(&bd_type);
468 if (IS_ERR(bd_mnt))
469 panic("Cannot create bdev pseudo-fs");
470 blockdev_superblock = bd_mnt->mnt_sb;
471}
472
473
474
475
476
477
478static inline unsigned long hash(dev_t dev)
479{
480 return MAJOR(dev)+MINOR(dev);
481}
482
483static int bdev_test(struct inode *inode, void *data)
484{
485 return BDEV_I(inode)->bdev.bd_dev == *(dev_t *)data;
486}
487
488static int bdev_set(struct inode *inode, void *data)
489{
490 BDEV_I(inode)->bdev.bd_dev = *(dev_t *)data;
491 return 0;
492}
493
494static LIST_HEAD(all_bdevs);
495
496struct block_device *bdget(dev_t dev)
497{
498 struct block_device *bdev;
499 struct inode *inode;
500
501 inode = iget5_locked(blockdev_superblock, hash(dev),
502 bdev_test, bdev_set, &dev);
503
504 if (!inode)
505 return NULL;
506
507 bdev = &BDEV_I(inode)->bdev;
508
509 if (inode->i_state & I_NEW) {
510 bdev->bd_contains = NULL;
511 bdev->bd_super = NULL;
512 bdev->bd_inode = inode;
513 bdev->bd_block_size = (1 << inode->i_blkbits);
514 bdev->bd_part_count = 0;
515 bdev->bd_invalidated = 0;
516 inode->i_mode = S_IFBLK;
517 inode->i_rdev = dev;
518 inode->i_bdev = bdev;
519 inode->i_data.a_ops = &def_blk_aops;
520 mapping_set_gfp_mask(&inode->i_data, GFP_USER);
521 inode->i_data.backing_dev_info = &default_backing_dev_info;
522 spin_lock(&bdev_lock);
523 list_add(&bdev->bd_list, &all_bdevs);
524 spin_unlock(&bdev_lock);
525 unlock_new_inode(inode);
526 }
527 return bdev;
528}
529
530EXPORT_SYMBOL(bdget);
531
532
533
534
535
536struct block_device *bdgrab(struct block_device *bdev)
537{
538 ihold(bdev->bd_inode);
539 return bdev;
540}
541EXPORT_SYMBOL(bdgrab);
542
543long nr_blockdev_pages(void)
544{
545 struct block_device *bdev;
546 long ret = 0;
547 spin_lock(&bdev_lock);
548 list_for_each_entry(bdev, &all_bdevs, bd_list) {
549 ret += bdev->bd_inode->i_mapping->nrpages;
550 }
551 spin_unlock(&bdev_lock);
552 return ret;
553}
554
555void bdput(struct block_device *bdev)
556{
557 iput(bdev->bd_inode);
558}
559
560EXPORT_SYMBOL(bdput);
561
562static struct block_device *bd_acquire(struct inode *inode)
563{
564 struct block_device *bdev;
565
566 spin_lock(&bdev_lock);
567 bdev = inode->i_bdev;
568 if (bdev) {
569 ihold(bdev->bd_inode);
570 spin_unlock(&bdev_lock);
571 return bdev;
572 }
573 spin_unlock(&bdev_lock);
574
575 bdev = bdget(inode->i_rdev);
576 if (bdev) {
577 spin_lock(&bdev_lock);
578 if (!inode->i_bdev) {
579
580
581
582
583
584
585 ihold(bdev->bd_inode);
586 inode->i_bdev = bdev;
587 inode->i_mapping = bdev->bd_inode->i_mapping;
588 list_add(&inode->i_devices, &bdev->bd_inodes);
589 }
590 spin_unlock(&bdev_lock);
591 }
592 return bdev;
593}
594
595int sb_is_blkdev_sb(struct super_block *sb)
596{
597 return sb == blockdev_superblock;
598}
599
600
601
602void bd_forget(struct inode *inode)
603{
604 struct block_device *bdev = NULL;
605
606 spin_lock(&bdev_lock);
607 if (!sb_is_blkdev_sb(inode->i_sb))
608 bdev = inode->i_bdev;
609 __bd_forget(inode);
610 spin_unlock(&bdev_lock);
611
612 if (bdev)
613 iput(bdev->bd_inode);
614}
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630static bool bd_may_claim(struct block_device *bdev, struct block_device *whole,
631 void *holder)
632{
633 if (bdev->bd_holder == holder)
634 return true;
635 else if (bdev->bd_holder != NULL)
636 return false;
637 else if (bdev->bd_contains == bdev)
638 return true;
639
640 else if (whole->bd_holder == bd_may_claim)
641 return true;
642 else if (whole->bd_holder != NULL)
643 return false;
644 else
645 return true;
646}
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666static int bd_prepare_to_claim(struct block_device *bdev,
667 struct block_device *whole, void *holder)
668{
669retry:
670
671 if (!bd_may_claim(bdev, whole, holder))
672 return -EBUSY;
673
674
675 if (whole->bd_claiming) {
676 wait_queue_head_t *wq = bit_waitqueue(&whole->bd_claiming, 0);
677 DEFINE_WAIT(wait);
678
679 prepare_to_wait(wq, &wait, TASK_UNINTERRUPTIBLE);
680 spin_unlock(&bdev_lock);
681 schedule();
682 finish_wait(wq, &wait);
683 spin_lock(&bdev_lock);
684 goto retry;
685 }
686
687
688 return 0;
689}
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714static struct block_device *bd_start_claiming(struct block_device *bdev,
715 void *holder)
716{
717 struct gendisk *disk;
718 struct block_device *whole;
719 int partno, err;
720
721 might_sleep();
722
723
724
725
726
727 disk = get_gendisk(bdev->bd_dev, &partno);
728 if (!disk)
729 return ERR_PTR(-ENXIO);
730
731
732
733
734
735
736
737
738
739 if (partno)
740 whole = bdget_disk(disk, 0);
741 else
742 whole = bdgrab(bdev);
743
744 module_put(disk->fops->owner);
745 put_disk(disk);
746 if (!whole)
747 return ERR_PTR(-ENOMEM);
748
749
750 spin_lock(&bdev_lock);
751
752 err = bd_prepare_to_claim(bdev, whole, holder);
753 if (err == 0) {
754 whole->bd_claiming = holder;
755 spin_unlock(&bdev_lock);
756 return whole;
757 } else {
758 spin_unlock(&bdev_lock);
759 bdput(whole);
760 return ERR_PTR(err);
761 }
762}
763
764#ifdef CONFIG_SYSFS
765struct bd_holder_disk {
766 struct list_head list;
767 struct gendisk *disk;
768 int refcnt;
769};
770
771static struct bd_holder_disk *bd_find_holder_disk(struct block_device *bdev,
772 struct gendisk *disk)
773{
774 struct bd_holder_disk *holder;
775
776 list_for_each_entry(holder, &bdev->bd_holder_disks, list)
777 if (holder->disk == disk)
778 return holder;
779 return NULL;
780}
781
782static int add_symlink(struct kobject *from, struct kobject *to)
783{
784 return sysfs_create_link(from, to, kobject_name(to));
785}
786
787static void del_symlink(struct kobject *from, struct kobject *to)
788{
789 sysfs_remove_link(from, kobject_name(to));
790}
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820int bd_link_disk_holder(struct block_device *bdev, struct gendisk *disk)
821{
822 struct bd_holder_disk *holder;
823 int ret = 0;
824
825 mutex_lock(&bdev->bd_mutex);
826
827 WARN_ON_ONCE(!bdev->bd_holder);
828
829
830 if (WARN_ON(!disk->slave_dir || !bdev->bd_part->holder_dir))
831 goto out_unlock;
832
833 holder = bd_find_holder_disk(bdev, disk);
834 if (holder) {
835 holder->refcnt++;
836 goto out_unlock;
837 }
838
839 holder = kzalloc(sizeof(*holder), GFP_KERNEL);
840 if (!holder) {
841 ret = -ENOMEM;
842 goto out_unlock;
843 }
844
845 INIT_LIST_HEAD(&holder->list);
846 holder->disk = disk;
847 holder->refcnt = 1;
848
849 ret = add_symlink(disk->slave_dir, &part_to_dev(bdev->bd_part)->kobj);
850 if (ret)
851 goto out_free;
852
853 ret = add_symlink(bdev->bd_part->holder_dir, &disk_to_dev(disk)->kobj);
854 if (ret)
855 goto out_del;
856
857
858
859
860 kobject_get(bdev->bd_part->holder_dir);
861
862 list_add(&holder->list, &bdev->bd_holder_disks);
863 goto out_unlock;
864
865out_del:
866 del_symlink(disk->slave_dir, &part_to_dev(bdev->bd_part)->kobj);
867out_free:
868 kfree(holder);
869out_unlock:
870 mutex_unlock(&bdev->bd_mutex);
871 return ret;
872}
873EXPORT_SYMBOL_GPL(bd_link_disk_holder);
874
875
876
877
878
879
880
881
882
883
884
885void bd_unlink_disk_holder(struct block_device *bdev, struct gendisk *disk)
886{
887 struct bd_holder_disk *holder;
888
889 mutex_lock(&bdev->bd_mutex);
890
891 holder = bd_find_holder_disk(bdev, disk);
892
893 if (!WARN_ON_ONCE(holder == NULL) && !--holder->refcnt) {
894 del_symlink(disk->slave_dir, &part_to_dev(bdev->bd_part)->kobj);
895 del_symlink(bdev->bd_part->holder_dir,
896 &disk_to_dev(disk)->kobj);
897 kobject_put(bdev->bd_part->holder_dir);
898 list_del_init(&holder->list);
899 kfree(holder);
900 }
901
902 mutex_unlock(&bdev->bd_mutex);
903}
904EXPORT_SYMBOL_GPL(bd_unlink_disk_holder);
905#endif
906
907
908
909
910
911
912
913
914
915
916
917static void flush_disk(struct block_device *bdev, bool kill_dirty)
918{
919 if (__invalidate_device(bdev, kill_dirty)) {
920 char name[BDEVNAME_SIZE] = "";
921
922 if (bdev->bd_disk)
923 disk_name(bdev->bd_disk, 0, name);
924 printk(KERN_WARNING "VFS: busy inodes on changed media or "
925 "resized disk %s\n", name);
926 }
927
928 if (!bdev->bd_disk)
929 return;
930 if (disk_part_scan_enabled(bdev->bd_disk))
931 bdev->bd_invalidated = 1;
932}
933
934
935
936
937
938
939
940
941
942void check_disk_size_change(struct gendisk *disk, struct block_device *bdev)
943{
944 loff_t disk_size, bdev_size;
945
946 disk_size = (loff_t)get_capacity(disk) << 9;
947 bdev_size = i_size_read(bdev->bd_inode);
948 if (disk_size != bdev_size) {
949 char name[BDEVNAME_SIZE];
950
951 disk_name(disk, 0, name);
952 printk(KERN_INFO
953 "%s: detected capacity change from %lld to %lld\n",
954 name, bdev_size, disk_size);
955 i_size_write(bdev->bd_inode, disk_size);
956 flush_disk(bdev, false);
957 }
958}
959EXPORT_SYMBOL(check_disk_size_change);
960
961
962
963
964
965
966
967
968
969int revalidate_disk(struct gendisk *disk)
970{
971 struct block_device *bdev;
972 int ret = 0;
973
974 if (disk->fops->revalidate_disk)
975 ret = disk->fops->revalidate_disk(disk);
976
977 bdev = bdget_disk(disk, 0);
978 if (!bdev)
979 return ret;
980
981 mutex_lock(&bdev->bd_mutex);
982 check_disk_size_change(disk, bdev);
983 bdev->bd_invalidated = 0;
984 mutex_unlock(&bdev->bd_mutex);
985 bdput(bdev);
986 return ret;
987}
988EXPORT_SYMBOL(revalidate_disk);
989
990
991
992
993
994
995
996
997
998
999int check_disk_change(struct block_device *bdev)
1000{
1001 struct gendisk *disk = bdev->bd_disk;
1002 const struct block_device_operations *bdops = disk->fops;
1003 unsigned int events;
1004
1005 events = disk_clear_events(disk, DISK_EVENT_MEDIA_CHANGE |
1006 DISK_EVENT_EJECT_REQUEST);
1007 if (!(events & DISK_EVENT_MEDIA_CHANGE))
1008 return 0;
1009
1010 flush_disk(bdev, true);
1011 if (bdops->revalidate_disk)
1012 bdops->revalidate_disk(bdev->bd_disk);
1013 return 1;
1014}
1015
1016EXPORT_SYMBOL(check_disk_change);
1017
1018void bd_set_size(struct block_device *bdev, loff_t size)
1019{
1020 unsigned bsize = bdev_logical_block_size(bdev);
1021
1022 mutex_lock(&bdev->bd_inode->i_mutex);
1023 i_size_write(bdev->bd_inode, size);
1024 mutex_unlock(&bdev->bd_inode->i_mutex);
1025 while (bsize < PAGE_CACHE_SIZE) {
1026 if (size & bsize)
1027 break;
1028 bsize <<= 1;
1029 }
1030 bdev->bd_block_size = bsize;
1031 bdev->bd_inode->i_blkbits = blksize_bits(bsize);
1032}
1033EXPORT_SYMBOL(bd_set_size);
1034
1035static void __blkdev_put(struct block_device *bdev, fmode_t mode, int for_part);
1036
1037
1038
1039
1040
1041
1042
1043
1044static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part)
1045{
1046 struct gendisk *disk;
1047 struct module *owner;
1048 int ret;
1049 int partno;
1050 int perm = 0;
1051
1052 if (mode & FMODE_READ)
1053 perm |= MAY_READ;
1054 if (mode & FMODE_WRITE)
1055 perm |= MAY_WRITE;
1056
1057
1058
1059 if (!for_part) {
1060 ret = devcgroup_inode_permission(bdev->bd_inode, perm);
1061 if (ret != 0) {
1062 bdput(bdev);
1063 return ret;
1064 }
1065 }
1066
1067 restart:
1068
1069 ret = -ENXIO;
1070 disk = get_gendisk(bdev->bd_dev, &partno);
1071 if (!disk)
1072 goto out;
1073 owner = disk->fops->owner;
1074
1075 disk_block_events(disk);
1076 mutex_lock_nested(&bdev->bd_mutex, for_part);
1077 if (!bdev->bd_openers) {
1078 bdev->bd_disk = disk;
1079 bdev->bd_queue = disk->queue;
1080 bdev->bd_contains = bdev;
1081 if (!partno) {
1082 struct backing_dev_info *bdi;
1083
1084 ret = -ENXIO;
1085 bdev->bd_part = disk_get_part(disk, partno);
1086 if (!bdev->bd_part)
1087 goto out_clear;
1088
1089 ret = 0;
1090 if (disk->fops->open) {
1091 ret = disk->fops->open(bdev, mode);
1092 if (ret == -ERESTARTSYS) {
1093
1094
1095
1096
1097 disk_put_part(bdev->bd_part);
1098 bdev->bd_part = NULL;
1099 bdev->bd_disk = NULL;
1100 bdev->bd_queue = NULL;
1101 mutex_unlock(&bdev->bd_mutex);
1102 disk_unblock_events(disk);
1103 put_disk(disk);
1104 module_put(owner);
1105 goto restart;
1106 }
1107 }
1108
1109 if (!ret) {
1110 bd_set_size(bdev,(loff_t)get_capacity(disk)<<9);
1111 bdi = blk_get_backing_dev_info(bdev);
1112 if (bdi == NULL)
1113 bdi = &default_backing_dev_info;
1114 bdev_inode_switch_bdi(bdev->bd_inode, bdi);
1115 }
1116
1117
1118
1119
1120
1121
1122
1123 if (bdev->bd_invalidated) {
1124 if (!ret)
1125 rescan_partitions(disk, bdev);
1126 else if (ret == -ENOMEDIUM)
1127 invalidate_partitions(disk, bdev);
1128 }
1129 if (ret)
1130 goto out_clear;
1131 } else {
1132 struct block_device *whole;
1133 whole = bdget_disk(disk, 0);
1134 ret = -ENOMEM;
1135 if (!whole)
1136 goto out_clear;
1137 BUG_ON(for_part);
1138 ret = __blkdev_get(whole, mode, 1);
1139 if (ret)
1140 goto out_clear;
1141 bdev->bd_contains = whole;
1142 bdev_inode_switch_bdi(bdev->bd_inode,
1143 whole->bd_inode->i_data.backing_dev_info);
1144 bdev->bd_part = disk_get_part(disk, partno);
1145 if (!(disk->flags & GENHD_FL_UP) ||
1146 !bdev->bd_part || !bdev->bd_part->nr_sects) {
1147 ret = -ENXIO;
1148 goto out_clear;
1149 }
1150 bd_set_size(bdev, (loff_t)bdev->bd_part->nr_sects << 9);
1151 }
1152 } else {
1153 if (bdev->bd_contains == bdev) {
1154 ret = 0;
1155 if (bdev->bd_disk->fops->open)
1156 ret = bdev->bd_disk->fops->open(bdev, mode);
1157
1158 if (bdev->bd_invalidated) {
1159 if (!ret)
1160 rescan_partitions(bdev->bd_disk, bdev);
1161 else if (ret == -ENOMEDIUM)
1162 invalidate_partitions(bdev->bd_disk, bdev);
1163 }
1164 if (ret)
1165 goto out_unlock_bdev;
1166 }
1167
1168 put_disk(disk);
1169 module_put(owner);
1170 }
1171 bdev->bd_openers++;
1172 if (for_part)
1173 bdev->bd_part_count++;
1174 mutex_unlock(&bdev->bd_mutex);
1175 disk_unblock_events(disk);
1176 return 0;
1177
1178 out_clear:
1179 disk_put_part(bdev->bd_part);
1180 bdev->bd_disk = NULL;
1181 bdev->bd_part = NULL;
1182 bdev->bd_queue = NULL;
1183 bdev_inode_switch_bdi(bdev->bd_inode, &default_backing_dev_info);
1184 if (bdev != bdev->bd_contains)
1185 __blkdev_put(bdev->bd_contains, mode, 1);
1186 bdev->bd_contains = NULL;
1187 out_unlock_bdev:
1188 mutex_unlock(&bdev->bd_mutex);
1189 disk_unblock_events(disk);
1190 put_disk(disk);
1191 module_put(owner);
1192 out:
1193 bdput(bdev);
1194
1195 return ret;
1196}
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217int blkdev_get(struct block_device *bdev, fmode_t mode, void *holder)
1218{
1219 struct block_device *whole = NULL;
1220 int res;
1221
1222 WARN_ON_ONCE((mode & FMODE_EXCL) && !holder);
1223
1224 if ((mode & FMODE_EXCL) && holder) {
1225 whole = bd_start_claiming(bdev, holder);
1226 if (IS_ERR(whole)) {
1227 bdput(bdev);
1228 return PTR_ERR(whole);
1229 }
1230 }
1231
1232 res = __blkdev_get(bdev, mode, 0);
1233
1234 if (whole) {
1235 struct gendisk *disk = whole->bd_disk;
1236
1237
1238 mutex_lock(&bdev->bd_mutex);
1239 spin_lock(&bdev_lock);
1240
1241 if (!res) {
1242 BUG_ON(!bd_may_claim(bdev, whole, holder));
1243
1244
1245
1246
1247
1248
1249 whole->bd_holders++;
1250 whole->bd_holder = bd_may_claim;
1251 bdev->bd_holders++;
1252 bdev->bd_holder = holder;
1253 }
1254
1255
1256 BUG_ON(whole->bd_claiming != holder);
1257 whole->bd_claiming = NULL;
1258 wake_up_bit(&whole->bd_claiming, 0);
1259
1260 spin_unlock(&bdev_lock);
1261
1262
1263
1264
1265
1266
1267
1268
1269 if (!res && (mode & FMODE_WRITE) && !bdev->bd_write_holder &&
1270 (disk->flags & GENHD_FL_BLOCK_EVENTS_ON_EXCL_WRITE)) {
1271 bdev->bd_write_holder = true;
1272 disk_block_events(disk);
1273 }
1274
1275 mutex_unlock(&bdev->bd_mutex);
1276 bdput(whole);
1277 }
1278
1279 return res;
1280}
1281EXPORT_SYMBOL(blkdev_get);
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300struct block_device *blkdev_get_by_path(const char *path, fmode_t mode,
1301 void *holder)
1302{
1303 struct block_device *bdev;
1304 int err;
1305
1306 bdev = lookup_bdev(path);
1307 if (IS_ERR(bdev))
1308 return bdev;
1309
1310 err = blkdev_get(bdev, mode, holder);
1311 if (err)
1312 return ERR_PTR(err);
1313
1314 if ((mode & FMODE_WRITE) && bdev_read_only(bdev)) {
1315 blkdev_put(bdev, mode);
1316 return ERR_PTR(-EACCES);
1317 }
1318
1319 return bdev;
1320}
1321EXPORT_SYMBOL(blkdev_get_by_path);
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345struct block_device *blkdev_get_by_dev(dev_t dev, fmode_t mode, void *holder)
1346{
1347 struct block_device *bdev;
1348 int err;
1349
1350 bdev = bdget(dev);
1351 if (!bdev)
1352 return ERR_PTR(-ENOMEM);
1353
1354 err = blkdev_get(bdev, mode, holder);
1355 if (err)
1356 return ERR_PTR(err);
1357
1358 return bdev;
1359}
1360EXPORT_SYMBOL(blkdev_get_by_dev);
1361
1362static int blkdev_open(struct inode * inode, struct file * filp)
1363{
1364 struct block_device *bdev;
1365
1366
1367
1368
1369
1370
1371
1372 filp->f_flags |= O_LARGEFILE;
1373
1374 if (filp->f_flags & O_NDELAY)
1375 filp->f_mode |= FMODE_NDELAY;
1376 if (filp->f_flags & O_EXCL)
1377 filp->f_mode |= FMODE_EXCL;
1378 if ((filp->f_flags & O_ACCMODE) == 3)
1379 filp->f_mode |= FMODE_WRITE_IOCTL;
1380
1381 bdev = bd_acquire(inode);
1382 if (bdev == NULL)
1383 return -ENOMEM;
1384
1385 filp->f_mapping = bdev->bd_inode->i_mapping;
1386
1387 return blkdev_get(bdev, filp->f_mode, filp);
1388}
1389
1390static void __blkdev_put(struct block_device *bdev, fmode_t mode, int for_part)
1391{
1392 struct gendisk *disk = bdev->bd_disk;
1393 struct block_device *victim = NULL;
1394
1395 mutex_lock_nested(&bdev->bd_mutex, for_part);
1396 if (for_part)
1397 bdev->bd_part_count--;
1398
1399 if (!--bdev->bd_openers) {
1400 WARN_ON_ONCE(bdev->bd_holders);
1401 sync_blockdev(bdev);
1402 kill_bdev(bdev);
1403
1404
1405
1406 bdev_inode_switch_bdi(bdev->bd_inode,
1407 &default_backing_dev_info);
1408 }
1409 if (bdev->bd_contains == bdev) {
1410 if (disk->fops->release)
1411 disk->fops->release(disk, mode);
1412 }
1413 if (!bdev->bd_openers) {
1414 struct module *owner = disk->fops->owner;
1415
1416 disk_put_part(bdev->bd_part);
1417 bdev->bd_part = NULL;
1418 bdev->bd_disk = NULL;
1419 if (bdev != bdev->bd_contains)
1420 victim = bdev->bd_contains;
1421 bdev->bd_contains = NULL;
1422
1423 put_disk(disk);
1424 module_put(owner);
1425 }
1426 mutex_unlock(&bdev->bd_mutex);
1427 bdput(bdev);
1428 if (victim)
1429 __blkdev_put(victim, mode, 1);
1430}
1431
1432void blkdev_put(struct block_device *bdev, fmode_t mode)
1433{
1434 mutex_lock(&bdev->bd_mutex);
1435
1436 if (mode & FMODE_EXCL) {
1437 bool bdev_free;
1438
1439
1440
1441
1442
1443
1444 spin_lock(&bdev_lock);
1445
1446 WARN_ON_ONCE(--bdev->bd_holders < 0);
1447 WARN_ON_ONCE(--bdev->bd_contains->bd_holders < 0);
1448
1449
1450 if ((bdev_free = !bdev->bd_holders))
1451 bdev->bd_holder = NULL;
1452 if (!bdev->bd_contains->bd_holders)
1453 bdev->bd_contains->bd_holder = NULL;
1454
1455 spin_unlock(&bdev_lock);
1456
1457
1458
1459
1460
1461 if (bdev_free && bdev->bd_write_holder) {
1462 disk_unblock_events(bdev->bd_disk);
1463 bdev->bd_write_holder = false;
1464 }
1465 }
1466
1467
1468
1469
1470
1471
1472 disk_flush_events(bdev->bd_disk, DISK_EVENT_MEDIA_CHANGE);
1473
1474 mutex_unlock(&bdev->bd_mutex);
1475
1476 __blkdev_put(bdev, mode, 0);
1477}
1478EXPORT_SYMBOL(blkdev_put);
1479
1480static int blkdev_close(struct inode * inode, struct file * filp)
1481{
1482 struct block_device *bdev = I_BDEV(filp->f_mapping->host);
1483 blkdev_put(bdev, filp->f_mode);
1484 return 0;
1485}
1486
1487static long block_ioctl(struct file *file, unsigned cmd, unsigned long arg)
1488{
1489 struct block_device *bdev = I_BDEV(file->f_mapping->host);
1490 fmode_t mode = file->f_mode;
1491
1492
1493
1494
1495
1496 if (file->f_flags & O_NDELAY)
1497 mode |= FMODE_NDELAY;
1498 else
1499 mode &= ~FMODE_NDELAY;
1500
1501 return blkdev_ioctl(bdev, mode, cmd, arg);
1502}
1503
1504
1505
1506
1507
1508
1509
1510
1511ssize_t blkdev_aio_write(struct kiocb *iocb, const struct iovec *iov,
1512 unsigned long nr_segs, loff_t pos)
1513{
1514 struct file *file = iocb->ki_filp;
1515 struct blk_plug plug;
1516 ssize_t ret;
1517
1518 BUG_ON(iocb->ki_pos != pos);
1519
1520 blk_start_plug(&plug);
1521 ret = __generic_file_aio_write(iocb, iov, nr_segs, &iocb->ki_pos);
1522 if (ret > 0) {
1523 ssize_t err;
1524
1525 err = generic_write_sync(file, pos, ret);
1526 if (err < 0 && ret > 0)
1527 ret = err;
1528 }
1529 blk_finish_plug(&plug);
1530 return ret;
1531}
1532EXPORT_SYMBOL_GPL(blkdev_aio_write);
1533
1534static ssize_t blkdev_aio_read(struct kiocb *iocb, const struct iovec *iov,
1535 unsigned long nr_segs, loff_t pos)
1536{
1537 struct file *file = iocb->ki_filp;
1538 struct inode *bd_inode = file->f_mapping->host;
1539 loff_t size = i_size_read(bd_inode);
1540
1541 if (pos >= size)
1542 return 0;
1543
1544 size -= pos;
1545 if (size < iocb->ki_nbytes)
1546 nr_segs = iov_shorten((struct iovec *)iov, nr_segs, size);
1547 return generic_file_aio_read(iocb, iov, nr_segs, pos);
1548}
1549
1550
1551
1552
1553
1554static int blkdev_releasepage(struct page *page, gfp_t wait)
1555{
1556 struct super_block *super = BDEV_I(page->mapping->host)->bdev.bd_super;
1557
1558 if (super && super->s_op->bdev_try_to_free_page)
1559 return super->s_op->bdev_try_to_free_page(super, page, wait);
1560
1561 return try_to_free_buffers(page);
1562}
1563
1564static const struct address_space_operations def_blk_aops = {
1565 .readpage = blkdev_readpage,
1566 .writepage = blkdev_writepage,
1567 .write_begin = blkdev_write_begin,
1568 .write_end = blkdev_write_end,
1569 .writepages = generic_writepages,
1570 .releasepage = blkdev_releasepage,
1571 .direct_IO = blkdev_direct_IO,
1572 .is_dirty_writeback = buffer_check_dirty_writeback,
1573};
1574
1575const struct file_operations def_blk_fops = {
1576 .open = blkdev_open,
1577 .release = blkdev_close,
1578 .llseek = block_llseek,
1579 .read = do_sync_read,
1580 .write = do_sync_write,
1581 .aio_read = blkdev_aio_read,
1582 .aio_write = blkdev_aio_write,
1583 .mmap = generic_file_mmap,
1584 .fsync = blkdev_fsync,
1585 .unlocked_ioctl = block_ioctl,
1586#ifdef CONFIG_COMPAT
1587 .compat_ioctl = compat_blkdev_ioctl,
1588#endif
1589 .splice_read = generic_file_splice_read,
1590 .splice_write = generic_file_splice_write,
1591};
1592
1593int ioctl_by_bdev(struct block_device *bdev, unsigned cmd, unsigned long arg)
1594{
1595 int res;
1596 mm_segment_t old_fs = get_fs();
1597 set_fs(KERNEL_DS);
1598 res = blkdev_ioctl(bdev, 0, cmd, arg);
1599 set_fs(old_fs);
1600 return res;
1601}
1602
1603EXPORT_SYMBOL(ioctl_by_bdev);
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613struct block_device *lookup_bdev(const char *pathname)
1614{
1615 struct block_device *bdev;
1616 struct inode *inode;
1617 struct path path;
1618 int error;
1619
1620 if (!pathname || !*pathname)
1621 return ERR_PTR(-EINVAL);
1622
1623 error = kern_path(pathname, LOOKUP_FOLLOW, &path);
1624 if (error)
1625 return ERR_PTR(error);
1626
1627 inode = path.dentry->d_inode;
1628 error = -ENOTBLK;
1629 if (!S_ISBLK(inode->i_mode))
1630 goto fail;
1631 error = -EACCES;
1632 if (path.mnt->mnt_flags & MNT_NODEV)
1633 goto fail;
1634 error = -ENOMEM;
1635 bdev = bd_acquire(inode);
1636 if (!bdev)
1637 goto fail;
1638out:
1639 path_put(&path);
1640 return bdev;
1641fail:
1642 bdev = ERR_PTR(error);
1643 goto out;
1644}
1645EXPORT_SYMBOL(lookup_bdev);
1646
1647int __invalidate_device(struct block_device *bdev, bool kill_dirty)
1648{
1649 struct super_block *sb = get_super(bdev);
1650 int res = 0;
1651
1652 if (sb) {
1653
1654
1655
1656
1657
1658
1659 shrink_dcache_sb(sb);
1660 res = invalidate_inodes(sb, kill_dirty);
1661 drop_super(sb);
1662 }
1663 invalidate_bdev(bdev);
1664 return res;
1665}
1666EXPORT_SYMBOL(__invalidate_device);
1667
1668void iterate_bdevs(void (*func)(struct block_device *, void *), void *arg)
1669{
1670 struct inode *inode, *old_inode = NULL;
1671
1672 spin_lock(&inode_sb_list_lock);
1673 list_for_each_entry(inode, &blockdev_superblock->s_inodes, i_sb_list) {
1674 struct address_space *mapping = inode->i_mapping;
1675
1676 spin_lock(&inode->i_lock);
1677 if (inode->i_state & (I_FREEING|I_WILL_FREE|I_NEW) ||
1678 mapping->nrpages == 0) {
1679 spin_unlock(&inode->i_lock);
1680 continue;
1681 }
1682 __iget(inode);
1683 spin_unlock(&inode->i_lock);
1684 spin_unlock(&inode_sb_list_lock);
1685
1686
1687
1688
1689
1690
1691
1692
1693 iput(old_inode);
1694 old_inode = inode;
1695
1696 func(I_BDEV(inode), arg);
1697
1698 spin_lock(&inode_sb_list_lock);
1699 }
1700 spin_unlock(&inode_sb_list_lock);
1701 iput(old_inode);
1702}
1703