1
2
3
4
5
6
7
8#include <linux/init.h>
9#include <linux/mm.h>
10#include <linux/fcntl.h>
11#include <linux/slab.h>
12#include <linux/kmod.h>
13#include <linux/major.h>
14#include <linux/device_cgroup.h>
15#include <linux/highmem.h>
16#include <linux/blkdev.h>
17#include <linux/backing-dev.h>
18#include <linux/module.h>
19#include <linux/blkpg.h>
20#include <linux/magic.h>
21#include <linux/dax.h>
22#include <linux/buffer_head.h>
23#include <linux/swap.h>
24#include <linux/pagevec.h>
25#include <linux/writeback.h>
26#include <linux/mpage.h>
27#include <linux/mount.h>
28#include <linux/uio.h>
29#include <linux/namei.h>
30#include <linux/log2.h>
31#include <linux/cleancache.h>
32#include <linux/dax.h>
33#include <linux/badblocks.h>
34#include <linux/task_io_accounting_ops.h>
35#include <linux/falloc.h>
36#include <linux/uaccess.h>
37#include "internal.h"
38
39struct bdev_inode {
40 struct block_device bdev;
41 struct inode vfs_inode;
42};
43
44static const struct address_space_operations def_blk_aops;
45
46static inline struct bdev_inode *BDEV_I(struct inode *inode)
47{
48 return container_of(inode, struct bdev_inode, vfs_inode);
49}
50
51struct block_device *I_BDEV(struct inode *inode)
52{
53 return &BDEV_I(inode)->bdev;
54}
55EXPORT_SYMBOL(I_BDEV);
56
57void __vfs_msg(struct super_block *sb, const char *prefix, const char *fmt, ...)
58{
59 struct va_format vaf;
60 va_list args;
61
62 va_start(args, fmt);
63 vaf.fmt = fmt;
64 vaf.va = &args;
65 printk_ratelimited("%sVFS (%s): %pV\n", prefix, sb->s_id, &vaf);
66 va_end(args);
67}
68
69static void bdev_write_inode(struct block_device *bdev)
70{
71 struct inode *inode = bdev->bd_inode;
72 int ret;
73
74 spin_lock(&inode->i_lock);
75 while (inode->i_state & I_DIRTY) {
76 spin_unlock(&inode->i_lock);
77 ret = write_inode_now(inode, true);
78 if (ret) {
79 char name[BDEVNAME_SIZE];
80 pr_warn_ratelimited("VFS: Dirty inode writeback failed "
81 "for block device %s (err=%d).\n",
82 bdevname(bdev, name), ret);
83 }
84 spin_lock(&inode->i_lock);
85 }
86 spin_unlock(&inode->i_lock);
87}
88
89
90void kill_bdev(struct block_device *bdev)
91{
92 struct address_space *mapping = bdev->bd_inode->i_mapping;
93
94 if (mapping->nrpages == 0 && mapping->nrexceptional == 0)
95 return;
96
97 invalidate_bh_lrus();
98 truncate_inode_pages(mapping, 0);
99}
100EXPORT_SYMBOL(kill_bdev);
101
102
103void invalidate_bdev(struct block_device *bdev)
104{
105 struct address_space *mapping = bdev->bd_inode->i_mapping;
106
107 if (mapping->nrpages) {
108 invalidate_bh_lrus();
109 lru_add_drain_all();
110 invalidate_mapping_pages(mapping, 0, -1);
111 }
112
113
114
115 cleancache_invalidate_inode(mapping);
116}
117EXPORT_SYMBOL(invalidate_bdev);
118
119int set_blocksize(struct block_device *bdev, int size)
120{
121
122 if (size > PAGE_SIZE || size < 512 || !is_power_of_2(size))
123 return -EINVAL;
124
125
126 if (size < bdev_logical_block_size(bdev))
127 return -EINVAL;
128
129
130 if (bdev->bd_block_size != size) {
131 sync_blockdev(bdev);
132 bdev->bd_block_size = size;
133 bdev->bd_inode->i_blkbits = blksize_bits(size);
134 kill_bdev(bdev);
135 }
136 return 0;
137}
138
139EXPORT_SYMBOL(set_blocksize);
140
141int sb_set_blocksize(struct super_block *sb, int size)
142{
143 if (set_blocksize(sb->s_bdev, size))
144 return 0;
145
146
147 sb->s_blocksize = size;
148 sb->s_blocksize_bits = blksize_bits(size);
149 return sb->s_blocksize;
150}
151
152EXPORT_SYMBOL(sb_set_blocksize);
153
154int sb_min_blocksize(struct super_block *sb, int size)
155{
156 int minsize = bdev_logical_block_size(sb->s_bdev);
157 if (size < minsize)
158 size = minsize;
159 return sb_set_blocksize(sb, size);
160}
161
162EXPORT_SYMBOL(sb_min_blocksize);
163
164static int
165blkdev_get_block(struct inode *inode, sector_t iblock,
166 struct buffer_head *bh, int create)
167{
168 bh->b_bdev = I_BDEV(inode);
169 bh->b_blocknr = iblock;
170 set_buffer_mapped(bh);
171 return 0;
172}
173
174static struct inode *bdev_file_inode(struct file *file)
175{
176 return file->f_mapping->host;
177}
178
179static unsigned int dio_bio_write_op(struct kiocb *iocb)
180{
181 unsigned int op = REQ_OP_WRITE | REQ_SYNC | REQ_IDLE;
182
183
184 if (iocb->ki_flags & IOCB_DSYNC)
185 op |= REQ_FUA;
186 return op;
187}
188
189#define DIO_INLINE_BIO_VECS 4
190
191static void blkdev_bio_end_io_simple(struct bio *bio)
192{
193 struct task_struct *waiter = bio->bi_private;
194
195 WRITE_ONCE(bio->bi_private, NULL);
196 wake_up_process(waiter);
197}
198
199static ssize_t
200__blkdev_direct_IO_simple(struct kiocb *iocb, struct iov_iter *iter,
201 int nr_pages)
202{
203 struct file *file = iocb->ki_filp;
204 struct block_device *bdev = I_BDEV(bdev_file_inode(file));
205 struct bio_vec inline_vecs[DIO_INLINE_BIO_VECS], *vecs, *bvec;
206 loff_t pos = iocb->ki_pos;
207 bool should_dirty = false;
208 struct bio bio;
209 ssize_t ret;
210 blk_qc_t qc;
211 int i;
212
213 if ((pos | iov_iter_alignment(iter)) &
214 (bdev_logical_block_size(bdev) - 1))
215 return -EINVAL;
216
217 if (nr_pages <= DIO_INLINE_BIO_VECS)
218 vecs = inline_vecs;
219 else {
220 vecs = kmalloc(nr_pages * sizeof(struct bio_vec), GFP_KERNEL);
221 if (!vecs)
222 return -ENOMEM;
223 }
224
225 bio_init(&bio, vecs, nr_pages);
226 bio_set_dev(&bio, bdev);
227 bio.bi_iter.bi_sector = pos >> 9;
228 bio.bi_write_hint = iocb->ki_hint;
229 bio.bi_private = current;
230 bio.bi_end_io = blkdev_bio_end_io_simple;
231
232 ret = bio_iov_iter_get_pages(&bio, iter);
233 if (unlikely(ret))
234 return ret;
235 ret = bio.bi_iter.bi_size;
236
237 if (iov_iter_rw(iter) == READ) {
238 bio.bi_opf = REQ_OP_READ;
239 if (iter_is_iovec(iter))
240 should_dirty = true;
241 } else {
242 bio.bi_opf = dio_bio_write_op(iocb);
243 task_io_account_write(ret);
244 }
245
246 qc = submit_bio(&bio);
247 for (;;) {
248 set_current_state(TASK_UNINTERRUPTIBLE);
249 if (!READ_ONCE(bio.bi_private))
250 break;
251 if (!(iocb->ki_flags & IOCB_HIPRI) ||
252 !blk_mq_poll(bdev_get_queue(bdev), qc))
253 io_schedule();
254 }
255 __set_current_state(TASK_RUNNING);
256
257 bio_for_each_segment_all(bvec, &bio, i) {
258 if (should_dirty && !PageCompound(bvec->bv_page))
259 set_page_dirty_lock(bvec->bv_page);
260 put_page(bvec->bv_page);
261 }
262
263 if (vecs != inline_vecs)
264 kfree(vecs);
265
266 if (unlikely(bio.bi_status))
267 ret = blk_status_to_errno(bio.bi_status);
268
269 bio_uninit(&bio);
270
271 return ret;
272}
273
274struct blkdev_dio {
275 union {
276 struct kiocb *iocb;
277 struct task_struct *waiter;
278 };
279 size_t size;
280 atomic_t ref;
281 bool multi_bio : 1;
282 bool should_dirty : 1;
283 bool is_sync : 1;
284 struct bio bio;
285};
286
287static struct bio_set *blkdev_dio_pool __read_mostly;
288
289static void blkdev_bio_end_io(struct bio *bio)
290{
291 struct blkdev_dio *dio = bio->bi_private;
292 bool should_dirty = dio->should_dirty;
293
294 if (dio->multi_bio && !atomic_dec_and_test(&dio->ref)) {
295 if (bio->bi_status && !dio->bio.bi_status)
296 dio->bio.bi_status = bio->bi_status;
297 } else {
298 if (!dio->is_sync) {
299 struct kiocb *iocb = dio->iocb;
300 ssize_t ret;
301
302 if (likely(!dio->bio.bi_status)) {
303 ret = dio->size;
304 iocb->ki_pos += ret;
305 } else {
306 ret = blk_status_to_errno(dio->bio.bi_status);
307 }
308
309 dio->iocb->ki_complete(iocb, ret, 0);
310 bio_put(&dio->bio);
311 } else {
312 struct task_struct *waiter = dio->waiter;
313
314 WRITE_ONCE(dio->waiter, NULL);
315 wake_up_process(waiter);
316 }
317 }
318
319 if (should_dirty) {
320 bio_check_pages_dirty(bio);
321 } else {
322 struct bio_vec *bvec;
323 int i;
324
325 bio_for_each_segment_all(bvec, bio, i)
326 put_page(bvec->bv_page);
327 bio_put(bio);
328 }
329}
330
331static ssize_t
332__blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter, int nr_pages)
333{
334 struct file *file = iocb->ki_filp;
335 struct inode *inode = bdev_file_inode(file);
336 struct block_device *bdev = I_BDEV(inode);
337 struct blk_plug plug;
338 struct blkdev_dio *dio;
339 struct bio *bio;
340 bool is_read = (iov_iter_rw(iter) == READ), is_sync;
341 loff_t pos = iocb->ki_pos;
342 blk_qc_t qc = BLK_QC_T_NONE;
343 int ret = 0;
344
345 if ((pos | iov_iter_alignment(iter)) &
346 (bdev_logical_block_size(bdev) - 1))
347 return -EINVAL;
348
349 bio = bio_alloc_bioset(GFP_KERNEL, nr_pages, blkdev_dio_pool);
350 bio_get(bio);
351
352 dio = container_of(bio, struct blkdev_dio, bio);
353 dio->is_sync = is_sync = is_sync_kiocb(iocb);
354 if (dio->is_sync)
355 dio->waiter = current;
356 else
357 dio->iocb = iocb;
358
359 dio->size = 0;
360 dio->multi_bio = false;
361 dio->should_dirty = is_read && (iter->type == ITER_IOVEC);
362
363 blk_start_plug(&plug);
364 for (;;) {
365 bio_set_dev(bio, bdev);
366 bio->bi_iter.bi_sector = pos >> 9;
367 bio->bi_write_hint = iocb->ki_hint;
368 bio->bi_private = dio;
369 bio->bi_end_io = blkdev_bio_end_io;
370
371 ret = bio_iov_iter_get_pages(bio, iter);
372 if (unlikely(ret)) {
373 bio->bi_status = BLK_STS_IOERR;
374 bio_endio(bio);
375 break;
376 }
377
378 if (is_read) {
379 bio->bi_opf = REQ_OP_READ;
380 if (dio->should_dirty)
381 bio_set_pages_dirty(bio);
382 } else {
383 bio->bi_opf = dio_bio_write_op(iocb);
384 task_io_account_write(bio->bi_iter.bi_size);
385 }
386
387 dio->size += bio->bi_iter.bi_size;
388 pos += bio->bi_iter.bi_size;
389
390 nr_pages = iov_iter_npages(iter, BIO_MAX_PAGES);
391 if (!nr_pages) {
392 qc = submit_bio(bio);
393 break;
394 }
395
396 if (!dio->multi_bio) {
397 dio->multi_bio = true;
398 atomic_set(&dio->ref, 2);
399 } else {
400 atomic_inc(&dio->ref);
401 }
402
403 submit_bio(bio);
404 bio = bio_alloc(GFP_KERNEL, nr_pages);
405 }
406 blk_finish_plug(&plug);
407
408 if (!is_sync)
409 return -EIOCBQUEUED;
410
411 for (;;) {
412 set_current_state(TASK_UNINTERRUPTIBLE);
413 if (!READ_ONCE(dio->waiter))
414 break;
415
416 if (!(iocb->ki_flags & IOCB_HIPRI) ||
417 !blk_mq_poll(bdev_get_queue(bdev), qc))
418 io_schedule();
419 }
420 __set_current_state(TASK_RUNNING);
421
422 if (!ret)
423 ret = blk_status_to_errno(dio->bio.bi_status);
424 if (likely(!ret))
425 ret = dio->size;
426
427 bio_put(&dio->bio);
428 return ret;
429}
430
431static ssize_t
432blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter)
433{
434 int nr_pages;
435
436 nr_pages = iov_iter_npages(iter, BIO_MAX_PAGES + 1);
437 if (!nr_pages)
438 return 0;
439 if (is_sync_kiocb(iocb) && nr_pages <= BIO_MAX_PAGES)
440 return __blkdev_direct_IO_simple(iocb, iter, nr_pages);
441
442 return __blkdev_direct_IO(iocb, iter, min(nr_pages, BIO_MAX_PAGES));
443}
444
445static __init int blkdev_init(void)
446{
447 blkdev_dio_pool = bioset_create(4, offsetof(struct blkdev_dio, bio), BIOSET_NEED_BVECS);
448 if (!blkdev_dio_pool)
449 return -ENOMEM;
450 return 0;
451}
452module_init(blkdev_init);
453
454int __sync_blockdev(struct block_device *bdev, int wait)
455{
456 if (!bdev)
457 return 0;
458 if (!wait)
459 return filemap_flush(bdev->bd_inode->i_mapping);
460 return filemap_write_and_wait(bdev->bd_inode->i_mapping);
461}
462
463
464
465
466
467int sync_blockdev(struct block_device *bdev)
468{
469 return __sync_blockdev(bdev, 1);
470}
471EXPORT_SYMBOL(sync_blockdev);
472
473
474
475
476
477
478int fsync_bdev(struct block_device *bdev)
479{
480 struct super_block *sb = get_super(bdev);
481 if (sb) {
482 int res = sync_filesystem(sb);
483 drop_super(sb);
484 return res;
485 }
486 return sync_blockdev(bdev);
487}
488EXPORT_SYMBOL(fsync_bdev);
489
490
491
492
493
494
495
496
497
498
499
500
501
502struct super_block *freeze_bdev(struct block_device *bdev)
503{
504 struct super_block *sb;
505 int error = 0;
506
507 mutex_lock(&bdev->bd_fsfreeze_mutex);
508 if (++bdev->bd_fsfreeze_count > 1) {
509
510
511
512
513
514 sb = get_super(bdev);
515 if (sb)
516 drop_super(sb);
517 mutex_unlock(&bdev->bd_fsfreeze_mutex);
518 return sb;
519 }
520
521 sb = get_active_super(bdev);
522 if (!sb)
523 goto out;
524 if (sb->s_op->freeze_super)
525 error = sb->s_op->freeze_super(sb);
526 else
527 error = freeze_super(sb);
528 if (error) {
529 deactivate_super(sb);
530 bdev->bd_fsfreeze_count--;
531 mutex_unlock(&bdev->bd_fsfreeze_mutex);
532 return ERR_PTR(error);
533 }
534 deactivate_super(sb);
535 out:
536 sync_blockdev(bdev);
537 mutex_unlock(&bdev->bd_fsfreeze_mutex);
538 return sb;
539}
540EXPORT_SYMBOL(freeze_bdev);
541
542
543
544
545
546
547
548
549int thaw_bdev(struct block_device *bdev, struct super_block *sb)
550{
551 int error = -EINVAL;
552
553 mutex_lock(&bdev->bd_fsfreeze_mutex);
554 if (!bdev->bd_fsfreeze_count)
555 goto out;
556
557 error = 0;
558 if (--bdev->bd_fsfreeze_count > 0)
559 goto out;
560
561 if (!sb)
562 goto out;
563
564 if (sb->s_op->thaw_super)
565 error = sb->s_op->thaw_super(sb);
566 else
567 error = thaw_super(sb);
568 if (error)
569 bdev->bd_fsfreeze_count++;
570out:
571 mutex_unlock(&bdev->bd_fsfreeze_mutex);
572 return error;
573}
574EXPORT_SYMBOL(thaw_bdev);
575
576static int blkdev_writepage(struct page *page, struct writeback_control *wbc)
577{
578 return block_write_full_page(page, blkdev_get_block, wbc);
579}
580
581static int blkdev_readpage(struct file * file, struct page * page)
582{
583 return block_read_full_page(page, blkdev_get_block);
584}
585
586static int blkdev_readpages(struct file *file, struct address_space *mapping,
587 struct list_head *pages, unsigned nr_pages)
588{
589 return mpage_readpages(mapping, pages, nr_pages, blkdev_get_block);
590}
591
592static int blkdev_write_begin(struct file *file, struct address_space *mapping,
593 loff_t pos, unsigned len, unsigned flags,
594 struct page **pagep, void **fsdata)
595{
596 return block_write_begin(mapping, pos, len, flags, pagep,
597 blkdev_get_block);
598}
599
600static int blkdev_write_end(struct file *file, struct address_space *mapping,
601 loff_t pos, unsigned len, unsigned copied,
602 struct page *page, void *fsdata)
603{
604 int ret;
605 ret = block_write_end(file, mapping, pos, len, copied, page, fsdata);
606
607 unlock_page(page);
608 put_page(page);
609
610 return ret;
611}
612
613
614
615
616
617
618static loff_t block_llseek(struct file *file, loff_t offset, int whence)
619{
620 struct inode *bd_inode = bdev_file_inode(file);
621 loff_t retval;
622
623 inode_lock(bd_inode);
624 retval = fixed_size_llseek(file, offset, whence, i_size_read(bd_inode));
625 inode_unlock(bd_inode);
626 return retval;
627}
628
629int blkdev_fsync(struct file *filp, loff_t start, loff_t end, int datasync)
630{
631 struct inode *bd_inode = bdev_file_inode(filp);
632 struct block_device *bdev = I_BDEV(bd_inode);
633 int error;
634
635 error = file_write_and_wait_range(filp, start, end);
636 if (error)
637 return error;
638
639
640
641
642
643
644 error = blkdev_issue_flush(bdev, GFP_KERNEL, NULL);
645 if (error == -EOPNOTSUPP)
646 error = 0;
647
648 return error;
649}
650EXPORT_SYMBOL(blkdev_fsync);
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668int bdev_read_page(struct block_device *bdev, sector_t sector,
669 struct page *page)
670{
671 const struct block_device_operations *ops = bdev->bd_disk->fops;
672 int result = -EOPNOTSUPP;
673
674 if (!ops->rw_page || bdev_get_integrity(bdev))
675 return result;
676
677 result = blk_queue_enter(bdev->bd_queue, false);
678 if (result)
679 return result;
680 result = ops->rw_page(bdev, sector + get_start_sect(bdev), page, false);
681 blk_queue_exit(bdev->bd_queue);
682 return result;
683}
684EXPORT_SYMBOL_GPL(bdev_read_page);
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705int bdev_write_page(struct block_device *bdev, sector_t sector,
706 struct page *page, struct writeback_control *wbc)
707{
708 int result;
709 const struct block_device_operations *ops = bdev->bd_disk->fops;
710
711 if (!ops->rw_page || bdev_get_integrity(bdev))
712 return -EOPNOTSUPP;
713 result = blk_queue_enter(bdev->bd_queue, false);
714 if (result)
715 return result;
716
717 set_page_writeback(page);
718 result = ops->rw_page(bdev, sector + get_start_sect(bdev), page, true);
719 if (result) {
720 end_page_writeback(page);
721 } else {
722 clean_page_buffers(page);
723 unlock_page(page);
724 }
725 blk_queue_exit(bdev->bd_queue);
726 return result;
727}
728EXPORT_SYMBOL_GPL(bdev_write_page);
729
730
731
732
733
734static __cacheline_aligned_in_smp DEFINE_SPINLOCK(bdev_lock);
735static struct kmem_cache * bdev_cachep __read_mostly;
736
737static struct inode *bdev_alloc_inode(struct super_block *sb)
738{
739 struct bdev_inode *ei = kmem_cache_alloc(bdev_cachep, GFP_KERNEL);
740 if (!ei)
741 return NULL;
742 return &ei->vfs_inode;
743}
744
745static void bdev_i_callback(struct rcu_head *head)
746{
747 struct inode *inode = container_of(head, struct inode, i_rcu);
748 struct bdev_inode *bdi = BDEV_I(inode);
749
750 kmem_cache_free(bdev_cachep, bdi);
751}
752
753static void bdev_destroy_inode(struct inode *inode)
754{
755 call_rcu(&inode->i_rcu, bdev_i_callback);
756}
757
758static void init_once(void *foo)
759{
760 struct bdev_inode *ei = (struct bdev_inode *) foo;
761 struct block_device *bdev = &ei->bdev;
762
763 memset(bdev, 0, sizeof(*bdev));
764 mutex_init(&bdev->bd_mutex);
765 INIT_LIST_HEAD(&bdev->bd_list);
766#ifdef CONFIG_SYSFS
767 INIT_LIST_HEAD(&bdev->bd_holder_disks);
768#endif
769 bdev->bd_bdi = &noop_backing_dev_info;
770 inode_init_once(&ei->vfs_inode);
771
772 mutex_init(&bdev->bd_fsfreeze_mutex);
773}
774
775static void bdev_evict_inode(struct inode *inode)
776{
777 struct block_device *bdev = &BDEV_I(inode)->bdev;
778 truncate_inode_pages_final(&inode->i_data);
779 invalidate_inode_buffers(inode);
780 clear_inode(inode);
781 spin_lock(&bdev_lock);
782 list_del_init(&bdev->bd_list);
783 spin_unlock(&bdev_lock);
784
785 inode_detach_wb(inode);
786 if (bdev->bd_bdi != &noop_backing_dev_info) {
787 bdi_put(bdev->bd_bdi);
788 bdev->bd_bdi = &noop_backing_dev_info;
789 }
790}
791
792static const struct super_operations bdev_sops = {
793 .statfs = simple_statfs,
794 .alloc_inode = bdev_alloc_inode,
795 .destroy_inode = bdev_destroy_inode,
796 .drop_inode = generic_delete_inode,
797 .evict_inode = bdev_evict_inode,
798};
799
800static struct dentry *bd_mount(struct file_system_type *fs_type,
801 int flags, const char *dev_name, void *data)
802{
803 struct dentry *dent;
804 dent = mount_pseudo(fs_type, "bdev:", &bdev_sops, NULL, BDEVFS_MAGIC);
805 if (!IS_ERR(dent))
806 dent->d_sb->s_iflags |= SB_I_CGROUPWB;
807 return dent;
808}
809
810static struct file_system_type bd_type = {
811 .name = "bdev",
812 .mount = bd_mount,
813 .kill_sb = kill_anon_super,
814};
815
816struct super_block *blockdev_superblock __read_mostly;
817EXPORT_SYMBOL_GPL(blockdev_superblock);
818
819void __init bdev_cache_init(void)
820{
821 int err;
822 static struct vfsmount *bd_mnt;
823
824 bdev_cachep = kmem_cache_create("bdev_cache", sizeof(struct bdev_inode),
825 0, (SLAB_HWCACHE_ALIGN|SLAB_RECLAIM_ACCOUNT|
826 SLAB_MEM_SPREAD|SLAB_ACCOUNT|SLAB_PANIC),
827 init_once);
828 err = register_filesystem(&bd_type);
829 if (err)
830 panic("Cannot register bdev pseudo-fs");
831 bd_mnt = kern_mount(&bd_type);
832 if (IS_ERR(bd_mnt))
833 panic("Cannot create bdev pseudo-fs");
834 blockdev_superblock = bd_mnt->mnt_sb;
835}
836
837
838
839
840
841
842static inline unsigned long hash(dev_t dev)
843{
844 return MAJOR(dev)+MINOR(dev);
845}
846
847static int bdev_test(struct inode *inode, void *data)
848{
849 return BDEV_I(inode)->bdev.bd_dev == *(dev_t *)data;
850}
851
852static int bdev_set(struct inode *inode, void *data)
853{
854 BDEV_I(inode)->bdev.bd_dev = *(dev_t *)data;
855 return 0;
856}
857
858static LIST_HEAD(all_bdevs);
859
860
861
862
863
864void bdev_unhash_inode(dev_t dev)
865{
866 struct inode *inode;
867
868 inode = ilookup5(blockdev_superblock, hash(dev), bdev_test, &dev);
869 if (inode) {
870 remove_inode_hash(inode);
871 iput(inode);
872 }
873}
874
875struct block_device *bdget(dev_t dev)
876{
877 struct block_device *bdev;
878 struct inode *inode;
879
880 inode = iget5_locked(blockdev_superblock, hash(dev),
881 bdev_test, bdev_set, &dev);
882
883 if (!inode)
884 return NULL;
885
886 bdev = &BDEV_I(inode)->bdev;
887
888 if (inode->i_state & I_NEW) {
889 bdev->bd_contains = NULL;
890 bdev->bd_super = NULL;
891 bdev->bd_inode = inode;
892 bdev->bd_block_size = i_blocksize(inode);
893 bdev->bd_part_count = 0;
894 bdev->bd_invalidated = 0;
895 inode->i_mode = S_IFBLK;
896 inode->i_rdev = dev;
897 inode->i_bdev = bdev;
898 inode->i_data.a_ops = &def_blk_aops;
899 mapping_set_gfp_mask(&inode->i_data, GFP_USER);
900 spin_lock(&bdev_lock);
901 list_add(&bdev->bd_list, &all_bdevs);
902 spin_unlock(&bdev_lock);
903 unlock_new_inode(inode);
904 }
905 return bdev;
906}
907
908EXPORT_SYMBOL(bdget);
909
910
911
912
913
914struct block_device *bdgrab(struct block_device *bdev)
915{
916 ihold(bdev->bd_inode);
917 return bdev;
918}
919EXPORT_SYMBOL(bdgrab);
920
921long nr_blockdev_pages(void)
922{
923 struct block_device *bdev;
924 long ret = 0;
925 spin_lock(&bdev_lock);
926 list_for_each_entry(bdev, &all_bdevs, bd_list) {
927 ret += bdev->bd_inode->i_mapping->nrpages;
928 }
929 spin_unlock(&bdev_lock);
930 return ret;
931}
932
933void bdput(struct block_device *bdev)
934{
935 iput(bdev->bd_inode);
936}
937
938EXPORT_SYMBOL(bdput);
939
940static struct block_device *bd_acquire(struct inode *inode)
941{
942 struct block_device *bdev;
943
944 spin_lock(&bdev_lock);
945 bdev = inode->i_bdev;
946 if (bdev && !inode_unhashed(bdev->bd_inode)) {
947 bdgrab(bdev);
948 spin_unlock(&bdev_lock);
949 return bdev;
950 }
951 spin_unlock(&bdev_lock);
952
953
954
955
956
957
958
959 if (bdev)
960 bd_forget(inode);
961
962 bdev = bdget(inode->i_rdev);
963 if (bdev) {
964 spin_lock(&bdev_lock);
965 if (!inode->i_bdev) {
966
967
968
969
970
971
972 bdgrab(bdev);
973 inode->i_bdev = bdev;
974 inode->i_mapping = bdev->bd_inode->i_mapping;
975 }
976 spin_unlock(&bdev_lock);
977 }
978 return bdev;
979}
980
981
982
983void bd_forget(struct inode *inode)
984{
985 struct block_device *bdev = NULL;
986
987 spin_lock(&bdev_lock);
988 if (!sb_is_blkdev_sb(inode->i_sb))
989 bdev = inode->i_bdev;
990 inode->i_bdev = NULL;
991 inode->i_mapping = &inode->i_data;
992 spin_unlock(&bdev_lock);
993
994 if (bdev)
995 bdput(bdev);
996}
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012static bool bd_may_claim(struct block_device *bdev, struct block_device *whole,
1013 void *holder)
1014{
1015 if (bdev->bd_holder == holder)
1016 return true;
1017 else if (bdev->bd_holder != NULL)
1018 return false;
1019 else if (whole == bdev)
1020 return true;
1021
1022 else if (whole->bd_holder == bd_may_claim)
1023 return true;
1024 else if (whole->bd_holder != NULL)
1025 return false;
1026 else
1027 return true;
1028}
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048static int bd_prepare_to_claim(struct block_device *bdev,
1049 struct block_device *whole, void *holder)
1050{
1051retry:
1052
1053 if (!bd_may_claim(bdev, whole, holder))
1054 return -EBUSY;
1055
1056
1057 if (whole->bd_claiming) {
1058 wait_queue_head_t *wq = bit_waitqueue(&whole->bd_claiming, 0);
1059 DEFINE_WAIT(wait);
1060
1061 prepare_to_wait(wq, &wait, TASK_UNINTERRUPTIBLE);
1062 spin_unlock(&bdev_lock);
1063 schedule();
1064 finish_wait(wq, &wait);
1065 spin_lock(&bdev_lock);
1066 goto retry;
1067 }
1068
1069
1070 return 0;
1071}
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096static struct block_device *bd_start_claiming(struct block_device *bdev,
1097 void *holder)
1098{
1099 struct gendisk *disk;
1100 struct block_device *whole;
1101 int partno, err;
1102
1103 might_sleep();
1104
1105
1106
1107
1108
1109 disk = get_gendisk(bdev->bd_dev, &partno);
1110 if (!disk)
1111 return ERR_PTR(-ENXIO);
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121 if (partno)
1122 whole = bdget_disk(disk, 0);
1123 else
1124 whole = bdgrab(bdev);
1125
1126 module_put(disk->fops->owner);
1127 put_disk(disk);
1128 if (!whole)
1129 return ERR_PTR(-ENOMEM);
1130
1131
1132 spin_lock(&bdev_lock);
1133
1134 err = bd_prepare_to_claim(bdev, whole, holder);
1135 if (err == 0) {
1136 whole->bd_claiming = holder;
1137 spin_unlock(&bdev_lock);
1138 return whole;
1139 } else {
1140 spin_unlock(&bdev_lock);
1141 bdput(whole);
1142 return ERR_PTR(err);
1143 }
1144}
1145
1146#ifdef CONFIG_SYSFS
1147struct bd_holder_disk {
1148 struct list_head list;
1149 struct gendisk *disk;
1150 int refcnt;
1151};
1152
1153static struct bd_holder_disk *bd_find_holder_disk(struct block_device *bdev,
1154 struct gendisk *disk)
1155{
1156 struct bd_holder_disk *holder;
1157
1158 list_for_each_entry(holder, &bdev->bd_holder_disks, list)
1159 if (holder->disk == disk)
1160 return holder;
1161 return NULL;
1162}
1163
1164static int add_symlink(struct kobject *from, struct kobject *to)
1165{
1166 return sysfs_create_link(from, to, kobject_name(to));
1167}
1168
1169static void del_symlink(struct kobject *from, struct kobject *to)
1170{
1171 sysfs_remove_link(from, kobject_name(to));
1172}
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202int bd_link_disk_holder(struct block_device *bdev, struct gendisk *disk)
1203{
1204 struct bd_holder_disk *holder;
1205 int ret = 0;
1206
1207 mutex_lock(&bdev->bd_mutex);
1208
1209 WARN_ON_ONCE(!bdev->bd_holder);
1210
1211
1212 if (WARN_ON(!disk->slave_dir || !bdev->bd_part->holder_dir))
1213 goto out_unlock;
1214
1215 holder = bd_find_holder_disk(bdev, disk);
1216 if (holder) {
1217 holder->refcnt++;
1218 goto out_unlock;
1219 }
1220
1221 holder = kzalloc(sizeof(*holder), GFP_KERNEL);
1222 if (!holder) {
1223 ret = -ENOMEM;
1224 goto out_unlock;
1225 }
1226
1227 INIT_LIST_HEAD(&holder->list);
1228 holder->disk = disk;
1229 holder->refcnt = 1;
1230
1231 ret = add_symlink(disk->slave_dir, &part_to_dev(bdev->bd_part)->kobj);
1232 if (ret)
1233 goto out_free;
1234
1235 ret = add_symlink(bdev->bd_part->holder_dir, &disk_to_dev(disk)->kobj);
1236 if (ret)
1237 goto out_del;
1238
1239
1240
1241
1242 kobject_get(bdev->bd_part->holder_dir);
1243
1244 list_add(&holder->list, &bdev->bd_holder_disks);
1245 goto out_unlock;
1246
1247out_del:
1248 del_symlink(disk->slave_dir, &part_to_dev(bdev->bd_part)->kobj);
1249out_free:
1250 kfree(holder);
1251out_unlock:
1252 mutex_unlock(&bdev->bd_mutex);
1253 return ret;
1254}
1255EXPORT_SYMBOL_GPL(bd_link_disk_holder);
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267void bd_unlink_disk_holder(struct block_device *bdev, struct gendisk *disk)
1268{
1269 struct bd_holder_disk *holder;
1270
1271 mutex_lock(&bdev->bd_mutex);
1272
1273 holder = bd_find_holder_disk(bdev, disk);
1274
1275 if (!WARN_ON_ONCE(holder == NULL) && !--holder->refcnt) {
1276 del_symlink(disk->slave_dir, &part_to_dev(bdev->bd_part)->kobj);
1277 del_symlink(bdev->bd_part->holder_dir,
1278 &disk_to_dev(disk)->kobj);
1279 kobject_put(bdev->bd_part->holder_dir);
1280 list_del_init(&holder->list);
1281 kfree(holder);
1282 }
1283
1284 mutex_unlock(&bdev->bd_mutex);
1285}
1286EXPORT_SYMBOL_GPL(bd_unlink_disk_holder);
1287#endif
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299static void flush_disk(struct block_device *bdev, bool kill_dirty)
1300{
1301 if (__invalidate_device(bdev, kill_dirty)) {
1302 printk(KERN_WARNING "VFS: busy inodes on changed media or "
1303 "resized disk %s\n",
1304 bdev->bd_disk ? bdev->bd_disk->disk_name : "");
1305 }
1306
1307 if (!bdev->bd_disk)
1308 return;
1309 if (disk_part_scan_enabled(bdev->bd_disk))
1310 bdev->bd_invalidated = 1;
1311}
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321void check_disk_size_change(struct gendisk *disk, struct block_device *bdev)
1322{
1323 loff_t disk_size, bdev_size;
1324
1325 disk_size = (loff_t)get_capacity(disk) << 9;
1326 bdev_size = i_size_read(bdev->bd_inode);
1327 if (disk_size != bdev_size) {
1328 printk(KERN_INFO
1329 "%s: detected capacity change from %lld to %lld\n",
1330 disk->disk_name, bdev_size, disk_size);
1331 i_size_write(bdev->bd_inode, disk_size);
1332 flush_disk(bdev, false);
1333 }
1334}
1335EXPORT_SYMBOL(check_disk_size_change);
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345int revalidate_disk(struct gendisk *disk)
1346{
1347 struct block_device *bdev;
1348 int ret = 0;
1349
1350 if (disk->fops->revalidate_disk)
1351 ret = disk->fops->revalidate_disk(disk);
1352 bdev = bdget_disk(disk, 0);
1353 if (!bdev)
1354 return ret;
1355
1356 mutex_lock(&bdev->bd_mutex);
1357 check_disk_size_change(disk, bdev);
1358 bdev->bd_invalidated = 0;
1359 mutex_unlock(&bdev->bd_mutex);
1360 bdput(bdev);
1361 return ret;
1362}
1363EXPORT_SYMBOL(revalidate_disk);
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374int check_disk_change(struct block_device *bdev)
1375{
1376 struct gendisk *disk = bdev->bd_disk;
1377 const struct block_device_operations *bdops = disk->fops;
1378 unsigned int events;
1379
1380 events = disk_clear_events(disk, DISK_EVENT_MEDIA_CHANGE |
1381 DISK_EVENT_EJECT_REQUEST);
1382 if (!(events & DISK_EVENT_MEDIA_CHANGE))
1383 return 0;
1384
1385 flush_disk(bdev, true);
1386 if (bdops->revalidate_disk)
1387 bdops->revalidate_disk(bdev->bd_disk);
1388 return 1;
1389}
1390
1391EXPORT_SYMBOL(check_disk_change);
1392
1393void bd_set_size(struct block_device *bdev, loff_t size)
1394{
1395 unsigned bsize = bdev_logical_block_size(bdev);
1396
1397 inode_lock(bdev->bd_inode);
1398 i_size_write(bdev->bd_inode, size);
1399 inode_unlock(bdev->bd_inode);
1400 while (bsize < PAGE_SIZE) {
1401 if (size & bsize)
1402 break;
1403 bsize <<= 1;
1404 }
1405 bdev->bd_block_size = bsize;
1406 bdev->bd_inode->i_blkbits = blksize_bits(bsize);
1407}
1408EXPORT_SYMBOL(bd_set_size);
1409
1410static void __blkdev_put(struct block_device *bdev, fmode_t mode, int for_part);
1411
1412
1413
1414
1415
1416
1417
1418
1419static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part)
1420{
1421 struct gendisk *disk;
1422 struct module *owner;
1423 int ret;
1424 int partno;
1425 int perm = 0;
1426
1427 if (mode & FMODE_READ)
1428 perm |= MAY_READ;
1429 if (mode & FMODE_WRITE)
1430 perm |= MAY_WRITE;
1431
1432
1433
1434 if (!for_part) {
1435 ret = devcgroup_inode_permission(bdev->bd_inode, perm);
1436 if (ret != 0) {
1437 bdput(bdev);
1438 return ret;
1439 }
1440 }
1441
1442 restart:
1443
1444 ret = -ENXIO;
1445 disk = get_gendisk(bdev->bd_dev, &partno);
1446 if (!disk)
1447 goto out;
1448 owner = disk->fops->owner;
1449
1450 disk_block_events(disk);
1451 mutex_lock_nested(&bdev->bd_mutex, for_part);
1452 if (!bdev->bd_openers) {
1453 bdev->bd_disk = disk;
1454 bdev->bd_queue = disk->queue;
1455 bdev->bd_contains = bdev;
1456 bdev->bd_partno = partno;
1457
1458 if (!partno) {
1459 ret = -ENXIO;
1460 bdev->bd_part = disk_get_part(disk, partno);
1461 if (!bdev->bd_part)
1462 goto out_clear;
1463
1464 ret = 0;
1465 if (disk->fops->open) {
1466 ret = disk->fops->open(bdev, mode);
1467 if (ret == -ERESTARTSYS) {
1468
1469
1470
1471
1472 disk_put_part(bdev->bd_part);
1473 bdev->bd_part = NULL;
1474 bdev->bd_disk = NULL;
1475 bdev->bd_queue = NULL;
1476 mutex_unlock(&bdev->bd_mutex);
1477 disk_unblock_events(disk);
1478 put_disk(disk);
1479 module_put(owner);
1480 goto restart;
1481 }
1482 }
1483
1484 if (!ret)
1485 bd_set_size(bdev,(loff_t)get_capacity(disk)<<9);
1486
1487
1488
1489
1490
1491
1492
1493 if (bdev->bd_invalidated) {
1494 if (!ret)
1495 rescan_partitions(disk, bdev);
1496 else if (ret == -ENOMEDIUM)
1497 invalidate_partitions(disk, bdev);
1498 }
1499
1500 if (ret)
1501 goto out_clear;
1502 } else {
1503 struct block_device *whole;
1504 whole = bdget_disk(disk, 0);
1505 ret = -ENOMEM;
1506 if (!whole)
1507 goto out_clear;
1508 BUG_ON(for_part);
1509 ret = __blkdev_get(whole, mode, 1);
1510 if (ret)
1511 goto out_clear;
1512 bdev->bd_contains = whole;
1513 bdev->bd_part = disk_get_part(disk, partno);
1514 if (!(disk->flags & GENHD_FL_UP) ||
1515 !bdev->bd_part || !bdev->bd_part->nr_sects) {
1516 ret = -ENXIO;
1517 goto out_clear;
1518 }
1519 bd_set_size(bdev, (loff_t)bdev->bd_part->nr_sects << 9);
1520 }
1521
1522 if (bdev->bd_bdi == &noop_backing_dev_info)
1523 bdev->bd_bdi = bdi_get(disk->queue->backing_dev_info);
1524 } else {
1525 if (bdev->bd_contains == bdev) {
1526 ret = 0;
1527 if (bdev->bd_disk->fops->open)
1528 ret = bdev->bd_disk->fops->open(bdev, mode);
1529
1530 if (bdev->bd_invalidated) {
1531 if (!ret)
1532 rescan_partitions(bdev->bd_disk, bdev);
1533 else if (ret == -ENOMEDIUM)
1534 invalidate_partitions(bdev->bd_disk, bdev);
1535 }
1536 if (ret)
1537 goto out_unlock_bdev;
1538 }
1539
1540 put_disk(disk);
1541 module_put(owner);
1542 }
1543 bdev->bd_openers++;
1544 if (for_part)
1545 bdev->bd_part_count++;
1546 mutex_unlock(&bdev->bd_mutex);
1547 disk_unblock_events(disk);
1548 return 0;
1549
1550 out_clear:
1551 disk_put_part(bdev->bd_part);
1552 bdev->bd_disk = NULL;
1553 bdev->bd_part = NULL;
1554 bdev->bd_queue = NULL;
1555 if (bdev != bdev->bd_contains)
1556 __blkdev_put(bdev->bd_contains, mode, 1);
1557 bdev->bd_contains = NULL;
1558 out_unlock_bdev:
1559 mutex_unlock(&bdev->bd_mutex);
1560 disk_unblock_events(disk);
1561 put_disk(disk);
1562 module_put(owner);
1563 out:
1564 bdput(bdev);
1565
1566 return ret;
1567}
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588int blkdev_get(struct block_device *bdev, fmode_t mode, void *holder)
1589{
1590 struct block_device *whole = NULL;
1591 int res;
1592
1593 WARN_ON_ONCE((mode & FMODE_EXCL) && !holder);
1594
1595 if ((mode & FMODE_EXCL) && holder) {
1596 whole = bd_start_claiming(bdev, holder);
1597 if (IS_ERR(whole)) {
1598 bdput(bdev);
1599 return PTR_ERR(whole);
1600 }
1601 }
1602
1603 res = __blkdev_get(bdev, mode, 0);
1604
1605 if (whole) {
1606 struct gendisk *disk = whole->bd_disk;
1607
1608
1609 mutex_lock(&bdev->bd_mutex);
1610 spin_lock(&bdev_lock);
1611
1612 if (!res) {
1613 BUG_ON(!bd_may_claim(bdev, whole, holder));
1614
1615
1616
1617
1618
1619
1620 whole->bd_holders++;
1621 whole->bd_holder = bd_may_claim;
1622 bdev->bd_holders++;
1623 bdev->bd_holder = holder;
1624 }
1625
1626
1627 BUG_ON(whole->bd_claiming != holder);
1628 whole->bd_claiming = NULL;
1629 wake_up_bit(&whole->bd_claiming, 0);
1630
1631 spin_unlock(&bdev_lock);
1632
1633
1634
1635
1636
1637
1638
1639
1640 if (!res && (mode & FMODE_WRITE) && !bdev->bd_write_holder &&
1641 (disk->flags & GENHD_FL_BLOCK_EVENTS_ON_EXCL_WRITE)) {
1642 bdev->bd_write_holder = true;
1643 disk_block_events(disk);
1644 }
1645
1646 mutex_unlock(&bdev->bd_mutex);
1647 bdput(whole);
1648 }
1649
1650 return res;
1651}
1652EXPORT_SYMBOL(blkdev_get);
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671struct block_device *blkdev_get_by_path(const char *path, fmode_t mode,
1672 void *holder)
1673{
1674 struct block_device *bdev;
1675 int err;
1676
1677 bdev = lookup_bdev(path);
1678 if (IS_ERR(bdev))
1679 return bdev;
1680
1681 err = blkdev_get(bdev, mode, holder);
1682 if (err)
1683 return ERR_PTR(err);
1684
1685 if ((mode & FMODE_WRITE) && bdev_read_only(bdev)) {
1686 blkdev_put(bdev, mode);
1687 return ERR_PTR(-EACCES);
1688 }
1689
1690 return bdev;
1691}
1692EXPORT_SYMBOL(blkdev_get_by_path);
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716struct block_device *blkdev_get_by_dev(dev_t dev, fmode_t mode, void *holder)
1717{
1718 struct block_device *bdev;
1719 int err;
1720
1721 bdev = bdget(dev);
1722 if (!bdev)
1723 return ERR_PTR(-ENOMEM);
1724
1725 err = blkdev_get(bdev, mode, holder);
1726 if (err)
1727 return ERR_PTR(err);
1728
1729 return bdev;
1730}
1731EXPORT_SYMBOL(blkdev_get_by_dev);
1732
1733static int blkdev_open(struct inode * inode, struct file * filp)
1734{
1735 struct block_device *bdev;
1736
1737
1738
1739
1740
1741
1742
1743 filp->f_flags |= O_LARGEFILE;
1744
1745 filp->f_mode |= FMODE_NOWAIT;
1746
1747 if (filp->f_flags & O_NDELAY)
1748 filp->f_mode |= FMODE_NDELAY;
1749 if (filp->f_flags & O_EXCL)
1750 filp->f_mode |= FMODE_EXCL;
1751 if ((filp->f_flags & O_ACCMODE) == 3)
1752 filp->f_mode |= FMODE_WRITE_IOCTL;
1753
1754 bdev = bd_acquire(inode);
1755 if (bdev == NULL)
1756 return -ENOMEM;
1757
1758 filp->f_mapping = bdev->bd_inode->i_mapping;
1759 filp->f_wb_err = filemap_sample_wb_err(filp->f_mapping);
1760
1761 return blkdev_get(bdev, filp->f_mode, filp);
1762}
1763
1764static void __blkdev_put(struct block_device *bdev, fmode_t mode, int for_part)
1765{
1766 struct gendisk *disk = bdev->bd_disk;
1767 struct block_device *victim = NULL;
1768
1769 mutex_lock_nested(&bdev->bd_mutex, for_part);
1770 if (for_part)
1771 bdev->bd_part_count--;
1772
1773 if (!--bdev->bd_openers) {
1774 WARN_ON_ONCE(bdev->bd_holders);
1775 sync_blockdev(bdev);
1776 kill_bdev(bdev);
1777
1778 bdev_write_inode(bdev);
1779 }
1780 if (bdev->bd_contains == bdev) {
1781 if (disk->fops->release)
1782 disk->fops->release(disk, mode);
1783 }
1784 if (!bdev->bd_openers) {
1785 struct module *owner = disk->fops->owner;
1786
1787 disk_put_part(bdev->bd_part);
1788 bdev->bd_part = NULL;
1789 bdev->bd_disk = NULL;
1790 if (bdev != bdev->bd_contains)
1791 victim = bdev->bd_contains;
1792 bdev->bd_contains = NULL;
1793
1794 put_disk(disk);
1795 module_put(owner);
1796 }
1797 mutex_unlock(&bdev->bd_mutex);
1798 bdput(bdev);
1799 if (victim)
1800 __blkdev_put(victim, mode, 1);
1801}
1802
1803void blkdev_put(struct block_device *bdev, fmode_t mode)
1804{
1805 mutex_lock(&bdev->bd_mutex);
1806
1807 if (mode & FMODE_EXCL) {
1808 bool bdev_free;
1809
1810
1811
1812
1813
1814
1815 spin_lock(&bdev_lock);
1816
1817 WARN_ON_ONCE(--bdev->bd_holders < 0);
1818 WARN_ON_ONCE(--bdev->bd_contains->bd_holders < 0);
1819
1820
1821 if ((bdev_free = !bdev->bd_holders))
1822 bdev->bd_holder = NULL;
1823 if (!bdev->bd_contains->bd_holders)
1824 bdev->bd_contains->bd_holder = NULL;
1825
1826 spin_unlock(&bdev_lock);
1827
1828
1829
1830
1831
1832 if (bdev_free && bdev->bd_write_holder) {
1833 disk_unblock_events(bdev->bd_disk);
1834 bdev->bd_write_holder = false;
1835 }
1836 }
1837
1838
1839
1840
1841
1842
1843 disk_flush_events(bdev->bd_disk, DISK_EVENT_MEDIA_CHANGE);
1844
1845 mutex_unlock(&bdev->bd_mutex);
1846
1847 __blkdev_put(bdev, mode, 0);
1848}
1849EXPORT_SYMBOL(blkdev_put);
1850
1851static int blkdev_close(struct inode * inode, struct file * filp)
1852{
1853 struct block_device *bdev = I_BDEV(bdev_file_inode(filp));
1854 blkdev_put(bdev, filp->f_mode);
1855 return 0;
1856}
1857
1858static long block_ioctl(struct file *file, unsigned cmd, unsigned long arg)
1859{
1860 struct block_device *bdev = I_BDEV(bdev_file_inode(file));
1861 fmode_t mode = file->f_mode;
1862
1863
1864
1865
1866
1867 if (file->f_flags & O_NDELAY)
1868 mode |= FMODE_NDELAY;
1869 else
1870 mode &= ~FMODE_NDELAY;
1871
1872 return blkdev_ioctl(bdev, mode, cmd, arg);
1873}
1874
1875
1876
1877
1878
1879
1880
1881
1882ssize_t blkdev_write_iter(struct kiocb *iocb, struct iov_iter *from)
1883{
1884 struct file *file = iocb->ki_filp;
1885 struct inode *bd_inode = bdev_file_inode(file);
1886 loff_t size = i_size_read(bd_inode);
1887 struct blk_plug plug;
1888 ssize_t ret;
1889
1890 if (bdev_read_only(I_BDEV(bd_inode)))
1891 return -EPERM;
1892
1893 if (!iov_iter_count(from))
1894 return 0;
1895
1896 if (iocb->ki_pos >= size)
1897 return -ENOSPC;
1898
1899 if ((iocb->ki_flags & (IOCB_NOWAIT | IOCB_DIRECT)) == IOCB_NOWAIT)
1900 return -EOPNOTSUPP;
1901
1902 iov_iter_truncate(from, size - iocb->ki_pos);
1903
1904 blk_start_plug(&plug);
1905 ret = __generic_file_write_iter(iocb, from);
1906 if (ret > 0)
1907 ret = generic_write_sync(iocb, ret);
1908 blk_finish_plug(&plug);
1909 return ret;
1910}
1911EXPORT_SYMBOL_GPL(blkdev_write_iter);
1912
1913ssize_t blkdev_read_iter(struct kiocb *iocb, struct iov_iter *to)
1914{
1915 struct file *file = iocb->ki_filp;
1916 struct inode *bd_inode = bdev_file_inode(file);
1917 loff_t size = i_size_read(bd_inode);
1918 loff_t pos = iocb->ki_pos;
1919
1920 if (pos >= size)
1921 return 0;
1922
1923 size -= pos;
1924 iov_iter_truncate(to, size);
1925 return generic_file_read_iter(iocb, to);
1926}
1927EXPORT_SYMBOL_GPL(blkdev_read_iter);
1928
1929
1930
1931
1932
1933static int blkdev_releasepage(struct page *page, gfp_t wait)
1934{
1935 struct super_block *super = BDEV_I(page->mapping->host)->bdev.bd_super;
1936
1937 if (super && super->s_op->bdev_try_to_free_page)
1938 return super->s_op->bdev_try_to_free_page(super, page, wait);
1939
1940 return try_to_free_buffers(page);
1941}
1942
1943static int blkdev_writepages(struct address_space *mapping,
1944 struct writeback_control *wbc)
1945{
1946 if (dax_mapping(mapping)) {
1947 struct block_device *bdev = I_BDEV(mapping->host);
1948
1949 return dax_writeback_mapping_range(mapping, bdev, wbc);
1950 }
1951 return generic_writepages(mapping, wbc);
1952}
1953
1954static const struct address_space_operations def_blk_aops = {
1955 .readpage = blkdev_readpage,
1956 .readpages = blkdev_readpages,
1957 .writepage = blkdev_writepage,
1958 .write_begin = blkdev_write_begin,
1959 .write_end = blkdev_write_end,
1960 .writepages = blkdev_writepages,
1961 .releasepage = blkdev_releasepage,
1962 .direct_IO = blkdev_direct_IO,
1963 .is_dirty_writeback = buffer_check_dirty_writeback,
1964};
1965
1966#define BLKDEV_FALLOC_FL_SUPPORTED \
1967 (FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE | \
1968 FALLOC_FL_ZERO_RANGE | FALLOC_FL_NO_HIDE_STALE)
1969
1970static long blkdev_fallocate(struct file *file, int mode, loff_t start,
1971 loff_t len)
1972{
1973 struct block_device *bdev = I_BDEV(bdev_file_inode(file));
1974 struct address_space *mapping;
1975 loff_t end = start + len - 1;
1976 loff_t isize;
1977 int error;
1978
1979
1980 if (mode & ~BLKDEV_FALLOC_FL_SUPPORTED)
1981 return -EOPNOTSUPP;
1982
1983
1984 isize = i_size_read(bdev->bd_inode);
1985 if (start >= isize)
1986 return -EINVAL;
1987 if (end >= isize) {
1988 if (mode & FALLOC_FL_KEEP_SIZE) {
1989 len = isize - start;
1990 end = start + len - 1;
1991 } else
1992 return -EINVAL;
1993 }
1994
1995
1996
1997
1998 if ((start | len) & (bdev_logical_block_size(bdev) - 1))
1999 return -EINVAL;
2000
2001
2002 mapping = bdev->bd_inode->i_mapping;
2003 truncate_inode_pages_range(mapping, start, end);
2004
2005 switch (mode) {
2006 case FALLOC_FL_ZERO_RANGE:
2007 case FALLOC_FL_ZERO_RANGE | FALLOC_FL_KEEP_SIZE:
2008 error = blkdev_issue_zeroout(bdev, start >> 9, len >> 9,
2009 GFP_KERNEL, BLKDEV_ZERO_NOUNMAP);
2010 break;
2011 case FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE:
2012 error = blkdev_issue_zeroout(bdev, start >> 9, len >> 9,
2013 GFP_KERNEL, BLKDEV_ZERO_NOFALLBACK);
2014 break;
2015 case FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE | FALLOC_FL_NO_HIDE_STALE:
2016 error = blkdev_issue_discard(bdev, start >> 9, len >> 9,
2017 GFP_KERNEL, 0);
2018 break;
2019 default:
2020 return -EOPNOTSUPP;
2021 }
2022 if (error)
2023 return error;
2024
2025
2026
2027
2028
2029
2030 return invalidate_inode_pages2_range(mapping,
2031 start >> PAGE_SHIFT,
2032 end >> PAGE_SHIFT);
2033}
2034
2035const struct file_operations def_blk_fops = {
2036 .open = blkdev_open,
2037 .release = blkdev_close,
2038 .llseek = block_llseek,
2039 .read_iter = blkdev_read_iter,
2040 .write_iter = blkdev_write_iter,
2041 .mmap = generic_file_mmap,
2042 .fsync = blkdev_fsync,
2043 .unlocked_ioctl = block_ioctl,
2044#ifdef CONFIG_COMPAT
2045 .compat_ioctl = compat_blkdev_ioctl,
2046#endif
2047 .splice_read = generic_file_splice_read,
2048 .splice_write = iter_file_splice_write,
2049 .fallocate = blkdev_fallocate,
2050};
2051
2052int ioctl_by_bdev(struct block_device *bdev, unsigned cmd, unsigned long arg)
2053{
2054 int res;
2055 mm_segment_t old_fs = get_fs();
2056 set_fs(KERNEL_DS);
2057 res = blkdev_ioctl(bdev, 0, cmd, arg);
2058 set_fs(old_fs);
2059 return res;
2060}
2061
2062EXPORT_SYMBOL(ioctl_by_bdev);
2063
2064
2065
2066
2067
2068
2069
2070
2071
2072struct block_device *lookup_bdev(const char *pathname)
2073{
2074 struct block_device *bdev;
2075 struct inode *inode;
2076 struct path path;
2077 int error;
2078
2079 if (!pathname || !*pathname)
2080 return ERR_PTR(-EINVAL);
2081
2082 error = kern_path(pathname, LOOKUP_FOLLOW, &path);
2083 if (error)
2084 return ERR_PTR(error);
2085
2086 inode = d_backing_inode(path.dentry);
2087 error = -ENOTBLK;
2088 if (!S_ISBLK(inode->i_mode))
2089 goto fail;
2090 error = -EACCES;
2091 if (!may_open_dev(&path))
2092 goto fail;
2093 error = -ENOMEM;
2094 bdev = bd_acquire(inode);
2095 if (!bdev)
2096 goto fail;
2097out:
2098 path_put(&path);
2099 return bdev;
2100fail:
2101 bdev = ERR_PTR(error);
2102 goto out;
2103}
2104EXPORT_SYMBOL(lookup_bdev);
2105
2106int __invalidate_device(struct block_device *bdev, bool kill_dirty)
2107{
2108 struct super_block *sb = get_super(bdev);
2109 int res = 0;
2110
2111 if (sb) {
2112
2113
2114
2115
2116
2117
2118 shrink_dcache_sb(sb);
2119 res = invalidate_inodes(sb, kill_dirty);
2120 drop_super(sb);
2121 }
2122 invalidate_bdev(bdev);
2123 return res;
2124}
2125EXPORT_SYMBOL(__invalidate_device);
2126
2127void iterate_bdevs(void (*func)(struct block_device *, void *), void *arg)
2128{
2129 struct inode *inode, *old_inode = NULL;
2130
2131 spin_lock(&blockdev_superblock->s_inode_list_lock);
2132 list_for_each_entry(inode, &blockdev_superblock->s_inodes, i_sb_list) {
2133 struct address_space *mapping = inode->i_mapping;
2134 struct block_device *bdev;
2135
2136 spin_lock(&inode->i_lock);
2137 if (inode->i_state & (I_FREEING|I_WILL_FREE|I_NEW) ||
2138 mapping->nrpages == 0) {
2139 spin_unlock(&inode->i_lock);
2140 continue;
2141 }
2142 __iget(inode);
2143 spin_unlock(&inode->i_lock);
2144 spin_unlock(&blockdev_superblock->s_inode_list_lock);
2145
2146
2147
2148
2149
2150
2151
2152
2153 iput(old_inode);
2154 old_inode = inode;
2155 bdev = I_BDEV(inode);
2156
2157 mutex_lock(&bdev->bd_mutex);
2158 if (bdev->bd_openers)
2159 func(bdev, arg);
2160 mutex_unlock(&bdev->bd_mutex);
2161
2162 spin_lock(&blockdev_superblock->s_inode_list_lock);
2163 }
2164 spin_unlock(&blockdev_superblock->s_inode_list_lock);
2165 iput(old_inode);
2166}
2167