1
2
3
4
5
6
7
8#include <linux/init.h>
9#include <linux/mm.h>
10#include <linux/fcntl.h>
11#include <linux/slab.h>
12#include <linux/kmod.h>
13#include <linux/major.h>
14#include <linux/device_cgroup.h>
15#include <linux/highmem.h>
16#include <linux/blkdev.h>
17#include <linux/backing-dev.h>
18#include <linux/module.h>
19#include <linux/blkpg.h>
20#include <linux/magic.h>
21#include <linux/dax.h>
22#include <linux/buffer_head.h>
23#include <linux/swap.h>
24#include <linux/pagevec.h>
25#include <linux/writeback.h>
26#include <linux/mpage.h>
27#include <linux/mount.h>
28#include <linux/uio.h>
29#include <linux/namei.h>
30#include <linux/log2.h>
31#include <linux/cleancache.h>
32#include <linux/dax.h>
33#include <linux/badblocks.h>
34#include <linux/task_io_accounting_ops.h>
35#include <linux/falloc.h>
36#include <linux/uaccess.h>
37#include "internal.h"
38
39struct bdev_inode {
40 struct block_device bdev;
41 struct inode vfs_inode;
42};
43
44static const struct address_space_operations def_blk_aops;
45
46static inline struct bdev_inode *BDEV_I(struct inode *inode)
47{
48 return container_of(inode, struct bdev_inode, vfs_inode);
49}
50
51struct block_device *I_BDEV(struct inode *inode)
52{
53 return &BDEV_I(inode)->bdev;
54}
55EXPORT_SYMBOL(I_BDEV);
56
57static void bdev_write_inode(struct block_device *bdev)
58{
59 struct inode *inode = bdev->bd_inode;
60 int ret;
61
62 spin_lock(&inode->i_lock);
63 while (inode->i_state & I_DIRTY) {
64 spin_unlock(&inode->i_lock);
65 ret = write_inode_now(inode, true);
66 if (ret) {
67 char name[BDEVNAME_SIZE];
68 pr_warn_ratelimited("VFS: Dirty inode writeback failed "
69 "for block device %s (err=%d).\n",
70 bdevname(bdev, name), ret);
71 }
72 spin_lock(&inode->i_lock);
73 }
74 spin_unlock(&inode->i_lock);
75}
76
77
78void kill_bdev(struct block_device *bdev)
79{
80 struct address_space *mapping = bdev->bd_inode->i_mapping;
81
82 if (mapping->nrpages == 0 && mapping->nrexceptional == 0)
83 return;
84
85 invalidate_bh_lrus();
86 truncate_inode_pages(mapping, 0);
87}
88EXPORT_SYMBOL(kill_bdev);
89
90
91void invalidate_bdev(struct block_device *bdev)
92{
93 struct address_space *mapping = bdev->bd_inode->i_mapping;
94
95 if (mapping->nrpages) {
96 invalidate_bh_lrus();
97 lru_add_drain_all();
98 invalidate_mapping_pages(mapping, 0, -1);
99 }
100
101
102
103 cleancache_invalidate_inode(mapping);
104}
105EXPORT_SYMBOL(invalidate_bdev);
106
107static void set_init_blocksize(struct block_device *bdev)
108{
109 unsigned bsize = bdev_logical_block_size(bdev);
110 loff_t size = i_size_read(bdev->bd_inode);
111
112 while (bsize < PAGE_SIZE) {
113 if (size & bsize)
114 break;
115 bsize <<= 1;
116 }
117 bdev->bd_block_size = bsize;
118 bdev->bd_inode->i_blkbits = blksize_bits(bsize);
119}
120
121int set_blocksize(struct block_device *bdev, int size)
122{
123
124 if (size > PAGE_SIZE || size < 512 || !is_power_of_2(size))
125 return -EINVAL;
126
127
128 if (size < bdev_logical_block_size(bdev))
129 return -EINVAL;
130
131
132 if (bdev->bd_block_size != size) {
133 sync_blockdev(bdev);
134 bdev->bd_block_size = size;
135 bdev->bd_inode->i_blkbits = blksize_bits(size);
136 kill_bdev(bdev);
137 }
138 return 0;
139}
140
141EXPORT_SYMBOL(set_blocksize);
142
143int sb_set_blocksize(struct super_block *sb, int size)
144{
145 if (set_blocksize(sb->s_bdev, size))
146 return 0;
147
148
149 sb->s_blocksize = size;
150 sb->s_blocksize_bits = blksize_bits(size);
151 return sb->s_blocksize;
152}
153
154EXPORT_SYMBOL(sb_set_blocksize);
155
156int sb_min_blocksize(struct super_block *sb, int size)
157{
158 int minsize = bdev_logical_block_size(sb->s_bdev);
159 if (size < minsize)
160 size = minsize;
161 return sb_set_blocksize(sb, size);
162}
163
164EXPORT_SYMBOL(sb_min_blocksize);
165
166static int
167blkdev_get_block(struct inode *inode, sector_t iblock,
168 struct buffer_head *bh, int create)
169{
170 bh->b_bdev = I_BDEV(inode);
171 bh->b_blocknr = iblock;
172 set_buffer_mapped(bh);
173 return 0;
174}
175
176static struct inode *bdev_file_inode(struct file *file)
177{
178 return file->f_mapping->host;
179}
180
181static unsigned int dio_bio_write_op(struct kiocb *iocb)
182{
183 unsigned int op = REQ_OP_WRITE | REQ_SYNC | REQ_IDLE;
184
185
186 if (iocb->ki_flags & IOCB_DSYNC)
187 op |= REQ_FUA;
188 return op;
189}
190
191#define DIO_INLINE_BIO_VECS 4
192
193static void blkdev_bio_end_io_simple(struct bio *bio)
194{
195 struct task_struct *waiter = bio->bi_private;
196
197 WRITE_ONCE(bio->bi_private, NULL);
198 blk_wake_io_task(waiter);
199}
200
201static ssize_t
202__blkdev_direct_IO_simple(struct kiocb *iocb, struct iov_iter *iter,
203 int nr_pages)
204{
205 struct file *file = iocb->ki_filp;
206 struct block_device *bdev = I_BDEV(bdev_file_inode(file));
207 struct bio_vec inline_vecs[DIO_INLINE_BIO_VECS], *vecs, *bvec;
208 loff_t pos = iocb->ki_pos;
209 bool should_dirty = false;
210 struct bio bio;
211 ssize_t ret;
212 blk_qc_t qc;
213 int i;
214
215 if ((pos | iov_iter_alignment(iter)) &
216 (bdev_logical_block_size(bdev) - 1))
217 return -EINVAL;
218
219 if (nr_pages <= DIO_INLINE_BIO_VECS)
220 vecs = inline_vecs;
221 else {
222 vecs = kmalloc_array(nr_pages, sizeof(struct bio_vec),
223 GFP_KERNEL);
224 if (!vecs)
225 return -ENOMEM;
226 }
227
228 bio_init(&bio, vecs, nr_pages);
229 bio_set_dev(&bio, bdev);
230 bio.bi_iter.bi_sector = pos >> 9;
231 bio.bi_write_hint = iocb->ki_hint;
232 bio.bi_private = current;
233 bio.bi_end_io = blkdev_bio_end_io_simple;
234 bio.bi_ioprio = iocb->ki_ioprio;
235
236 ret = bio_iov_iter_get_pages(&bio, iter);
237 if (unlikely(ret))
238 goto out;
239 ret = bio.bi_iter.bi_size;
240
241 if (iov_iter_rw(iter) == READ) {
242 bio.bi_opf = REQ_OP_READ;
243 if (iter_is_iovec(iter))
244 should_dirty = true;
245 } else {
246 bio.bi_opf = dio_bio_write_op(iocb);
247 task_io_account_write(ret);
248 }
249 if (iocb->ki_flags & IOCB_HIPRI)
250 bio_set_polled(&bio, iocb);
251
252 qc = submit_bio(&bio);
253 for (;;) {
254 set_current_state(TASK_UNINTERRUPTIBLE);
255 if (!READ_ONCE(bio.bi_private))
256 break;
257 if (!(iocb->ki_flags & IOCB_HIPRI) ||
258 !blk_poll(bdev_get_queue(bdev), qc, true))
259 io_schedule();
260 }
261 __set_current_state(TASK_RUNNING);
262
263 bio_for_each_segment_all(bvec, &bio, i) {
264 if (should_dirty && !PageCompound(bvec->bv_page))
265 set_page_dirty_lock(bvec->bv_page);
266 if (!bio_flagged(&bio, BIO_NO_PAGE_REF))
267 put_page(bvec->bv_page);
268 }
269
270 if (unlikely(bio.bi_status))
271 ret = blk_status_to_errno(bio.bi_status);
272
273out:
274 if (vecs != inline_vecs)
275 kfree(vecs);
276
277 bio_uninit(&bio);
278
279 return ret;
280}
281
282struct blkdev_dio {
283 union {
284 struct kiocb *iocb;
285 struct task_struct *waiter;
286 };
287 size_t size;
288 atomic_t ref;
289 bool multi_bio : 1;
290 bool should_dirty : 1;
291 bool is_sync : 1;
292 struct bio bio;
293};
294
295static struct bio_set blkdev_dio_pool;
296
297static int blkdev_iopoll(struct kiocb *kiocb, bool wait)
298{
299 struct block_device *bdev = I_BDEV(kiocb->ki_filp->f_mapping->host);
300 struct request_queue *q = bdev_get_queue(bdev);
301
302 return blk_poll(q, READ_ONCE(kiocb->ki_cookie), wait);
303}
304
305static void blkdev_bio_end_io(struct bio *bio)
306{
307 struct blkdev_dio *dio = bio->bi_private;
308 bool should_dirty = dio->should_dirty;
309
310 if (bio->bi_status && !dio->bio.bi_status)
311 dio->bio.bi_status = bio->bi_status;
312
313 if (!dio->multi_bio || atomic_dec_and_test(&dio->ref)) {
314 if (!dio->is_sync) {
315 struct kiocb *iocb = dio->iocb;
316 ssize_t ret;
317
318 if (likely(!dio->bio.bi_status)) {
319 ret = dio->size;
320 iocb->ki_pos += ret;
321 } else {
322 ret = blk_status_to_errno(dio->bio.bi_status);
323 }
324
325 dio->iocb->ki_complete(iocb, ret, 0);
326 if (dio->multi_bio)
327 bio_put(&dio->bio);
328 } else {
329 struct task_struct *waiter = dio->waiter;
330
331 WRITE_ONCE(dio->waiter, NULL);
332 blk_wake_io_task(waiter);
333 }
334 }
335
336 if (should_dirty) {
337 bio_check_pages_dirty(bio);
338 } else {
339 if (!bio_flagged(bio, BIO_NO_PAGE_REF)) {
340 struct bio_vec *bvec;
341 int i;
342
343 bio_for_each_segment_all(bvec, bio, i)
344 put_page(bvec->bv_page);
345 }
346 bio_put(bio);
347 }
348}
349
350static ssize_t
351__blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter, int nr_pages)
352{
353 struct file *file = iocb->ki_filp;
354 struct inode *inode = bdev_file_inode(file);
355 struct block_device *bdev = I_BDEV(inode);
356 struct blk_plug plug;
357 struct blkdev_dio *dio;
358 struct bio *bio;
359 bool is_poll = (iocb->ki_flags & IOCB_HIPRI) != 0;
360 bool is_read = (iov_iter_rw(iter) == READ), is_sync;
361 loff_t pos = iocb->ki_pos;
362 blk_qc_t qc = BLK_QC_T_NONE;
363 int ret = 0;
364
365 if ((pos | iov_iter_alignment(iter)) &
366 (bdev_logical_block_size(bdev) - 1))
367 return -EINVAL;
368
369 bio = bio_alloc_bioset(GFP_KERNEL, nr_pages, &blkdev_dio_pool);
370
371 dio = container_of(bio, struct blkdev_dio, bio);
372 dio->is_sync = is_sync = is_sync_kiocb(iocb);
373 if (dio->is_sync) {
374 dio->waiter = current;
375 bio_get(bio);
376 } else {
377 dio->iocb = iocb;
378 }
379
380 dio->size = 0;
381 dio->multi_bio = false;
382 dio->should_dirty = is_read && iter_is_iovec(iter);
383
384
385
386
387
388 if (!is_poll)
389 blk_start_plug(&plug);
390
391 for (;;) {
392 bio_set_dev(bio, bdev);
393 bio->bi_iter.bi_sector = pos >> 9;
394 bio->bi_write_hint = iocb->ki_hint;
395 bio->bi_private = dio;
396 bio->bi_end_io = blkdev_bio_end_io;
397 bio->bi_ioprio = iocb->ki_ioprio;
398
399 ret = bio_iov_iter_get_pages(bio, iter);
400 if (unlikely(ret)) {
401 bio->bi_status = BLK_STS_IOERR;
402 bio_endio(bio);
403 break;
404 }
405
406 if (is_read) {
407 bio->bi_opf = REQ_OP_READ;
408 if (dio->should_dirty)
409 bio_set_pages_dirty(bio);
410 } else {
411 bio->bi_opf = dio_bio_write_op(iocb);
412 task_io_account_write(bio->bi_iter.bi_size);
413 }
414
415 dio->size += bio->bi_iter.bi_size;
416 pos += bio->bi_iter.bi_size;
417
418 nr_pages = iov_iter_npages(iter, BIO_MAX_PAGES);
419 if (!nr_pages) {
420 bool polled = false;
421
422 if (iocb->ki_flags & IOCB_HIPRI) {
423 bio_set_polled(bio, iocb);
424 polled = true;
425 }
426
427 qc = submit_bio(bio);
428
429 if (polled)
430 WRITE_ONCE(iocb->ki_cookie, qc);
431 break;
432 }
433
434 if (!dio->multi_bio) {
435
436
437
438
439
440 if (!is_sync)
441 bio_get(bio);
442 dio->multi_bio = true;
443 atomic_set(&dio->ref, 2);
444 } else {
445 atomic_inc(&dio->ref);
446 }
447
448 submit_bio(bio);
449 bio = bio_alloc(GFP_KERNEL, nr_pages);
450 }
451
452 if (!is_poll)
453 blk_finish_plug(&plug);
454
455 if (!is_sync)
456 return -EIOCBQUEUED;
457
458 for (;;) {
459 set_current_state(TASK_UNINTERRUPTIBLE);
460 if (!READ_ONCE(dio->waiter))
461 break;
462
463 if (!(iocb->ki_flags & IOCB_HIPRI) ||
464 !blk_poll(bdev_get_queue(bdev), qc, true))
465 io_schedule();
466 }
467 __set_current_state(TASK_RUNNING);
468
469 if (!ret)
470 ret = blk_status_to_errno(dio->bio.bi_status);
471 if (likely(!ret))
472 ret = dio->size;
473
474 bio_put(&dio->bio);
475 return ret;
476}
477
478static ssize_t
479blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter)
480{
481 int nr_pages;
482
483 nr_pages = iov_iter_npages(iter, BIO_MAX_PAGES + 1);
484 if (!nr_pages)
485 return 0;
486 if (is_sync_kiocb(iocb) && nr_pages <= BIO_MAX_PAGES)
487 return __blkdev_direct_IO_simple(iocb, iter, nr_pages);
488
489 return __blkdev_direct_IO(iocb, iter, min(nr_pages, BIO_MAX_PAGES));
490}
491
492static __init int blkdev_init(void)
493{
494 return bioset_init(&blkdev_dio_pool, 4, offsetof(struct blkdev_dio, bio), BIOSET_NEED_BVECS);
495}
496module_init(blkdev_init);
497
498int __sync_blockdev(struct block_device *bdev, int wait)
499{
500 if (!bdev)
501 return 0;
502 if (!wait)
503 return filemap_flush(bdev->bd_inode->i_mapping);
504 return filemap_write_and_wait(bdev->bd_inode->i_mapping);
505}
506
507
508
509
510
511int sync_blockdev(struct block_device *bdev)
512{
513 return __sync_blockdev(bdev, 1);
514}
515EXPORT_SYMBOL(sync_blockdev);
516
517
518
519
520
521
522int fsync_bdev(struct block_device *bdev)
523{
524 struct super_block *sb = get_super(bdev);
525 if (sb) {
526 int res = sync_filesystem(sb);
527 drop_super(sb);
528 return res;
529 }
530 return sync_blockdev(bdev);
531}
532EXPORT_SYMBOL(fsync_bdev);
533
534
535
536
537
538
539
540
541
542
543
544
545
546struct super_block *freeze_bdev(struct block_device *bdev)
547{
548 struct super_block *sb;
549 int error = 0;
550
551 mutex_lock(&bdev->bd_fsfreeze_mutex);
552 if (++bdev->bd_fsfreeze_count > 1) {
553
554
555
556
557
558 sb = get_super(bdev);
559 if (sb)
560 drop_super(sb);
561 mutex_unlock(&bdev->bd_fsfreeze_mutex);
562 return sb;
563 }
564
565 sb = get_active_super(bdev);
566 if (!sb)
567 goto out;
568 if (sb->s_op->freeze_super)
569 error = sb->s_op->freeze_super(sb);
570 else
571 error = freeze_super(sb);
572 if (error) {
573 deactivate_super(sb);
574 bdev->bd_fsfreeze_count--;
575 mutex_unlock(&bdev->bd_fsfreeze_mutex);
576 return ERR_PTR(error);
577 }
578 deactivate_super(sb);
579 out:
580 sync_blockdev(bdev);
581 mutex_unlock(&bdev->bd_fsfreeze_mutex);
582 return sb;
583}
584EXPORT_SYMBOL(freeze_bdev);
585
586
587
588
589
590
591
592
593int thaw_bdev(struct block_device *bdev, struct super_block *sb)
594{
595 int error = -EINVAL;
596
597 mutex_lock(&bdev->bd_fsfreeze_mutex);
598 if (!bdev->bd_fsfreeze_count)
599 goto out;
600
601 error = 0;
602 if (--bdev->bd_fsfreeze_count > 0)
603 goto out;
604
605 if (!sb)
606 goto out;
607
608 if (sb->s_op->thaw_super)
609 error = sb->s_op->thaw_super(sb);
610 else
611 error = thaw_super(sb);
612 if (error)
613 bdev->bd_fsfreeze_count++;
614out:
615 mutex_unlock(&bdev->bd_fsfreeze_mutex);
616 return error;
617}
618EXPORT_SYMBOL(thaw_bdev);
619
620static int blkdev_writepage(struct page *page, struct writeback_control *wbc)
621{
622 return block_write_full_page(page, blkdev_get_block, wbc);
623}
624
625static int blkdev_readpage(struct file * file, struct page * page)
626{
627 return block_read_full_page(page, blkdev_get_block);
628}
629
630static int blkdev_readpages(struct file *file, struct address_space *mapping,
631 struct list_head *pages, unsigned nr_pages)
632{
633 return mpage_readpages(mapping, pages, nr_pages, blkdev_get_block);
634}
635
636static int blkdev_write_begin(struct file *file, struct address_space *mapping,
637 loff_t pos, unsigned len, unsigned flags,
638 struct page **pagep, void **fsdata)
639{
640 return block_write_begin(mapping, pos, len, flags, pagep,
641 blkdev_get_block);
642}
643
644static int blkdev_write_end(struct file *file, struct address_space *mapping,
645 loff_t pos, unsigned len, unsigned copied,
646 struct page *page, void *fsdata)
647{
648 int ret;
649 ret = block_write_end(file, mapping, pos, len, copied, page, fsdata);
650
651 unlock_page(page);
652 put_page(page);
653
654 return ret;
655}
656
657
658
659
660
661
662static loff_t block_llseek(struct file *file, loff_t offset, int whence)
663{
664 struct inode *bd_inode = bdev_file_inode(file);
665 loff_t retval;
666
667 inode_lock(bd_inode);
668 retval = fixed_size_llseek(file, offset, whence, i_size_read(bd_inode));
669 inode_unlock(bd_inode);
670 return retval;
671}
672
673int blkdev_fsync(struct file *filp, loff_t start, loff_t end, int datasync)
674{
675 struct inode *bd_inode = bdev_file_inode(filp);
676 struct block_device *bdev = I_BDEV(bd_inode);
677 int error;
678
679 error = file_write_and_wait_range(filp, start, end);
680 if (error)
681 return error;
682
683
684
685
686
687
688 error = blkdev_issue_flush(bdev, GFP_KERNEL, NULL);
689 if (error == -EOPNOTSUPP)
690 error = 0;
691
692 return error;
693}
694EXPORT_SYMBOL(blkdev_fsync);
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712int bdev_read_page(struct block_device *bdev, sector_t sector,
713 struct page *page)
714{
715 const struct block_device_operations *ops = bdev->bd_disk->fops;
716 int result = -EOPNOTSUPP;
717
718 if (!ops->rw_page || bdev_get_integrity(bdev))
719 return result;
720
721 result = blk_queue_enter(bdev->bd_queue, 0);
722 if (result)
723 return result;
724 result = ops->rw_page(bdev, sector + get_start_sect(bdev), page,
725 REQ_OP_READ);
726 blk_queue_exit(bdev->bd_queue);
727 return result;
728}
729EXPORT_SYMBOL_GPL(bdev_read_page);
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750int bdev_write_page(struct block_device *bdev, sector_t sector,
751 struct page *page, struct writeback_control *wbc)
752{
753 int result;
754 const struct block_device_operations *ops = bdev->bd_disk->fops;
755
756 if (!ops->rw_page || bdev_get_integrity(bdev))
757 return -EOPNOTSUPP;
758 result = blk_queue_enter(bdev->bd_queue, 0);
759 if (result)
760 return result;
761
762 set_page_writeback(page);
763 result = ops->rw_page(bdev, sector + get_start_sect(bdev), page,
764 REQ_OP_WRITE);
765 if (result) {
766 end_page_writeback(page);
767 } else {
768 clean_page_buffers(page);
769 unlock_page(page);
770 }
771 blk_queue_exit(bdev->bd_queue);
772 return result;
773}
774EXPORT_SYMBOL_GPL(bdev_write_page);
775
776
777
778
779
780static __cacheline_aligned_in_smp DEFINE_SPINLOCK(bdev_lock);
781static struct kmem_cache * bdev_cachep __read_mostly;
782
783static struct inode *bdev_alloc_inode(struct super_block *sb)
784{
785 struct bdev_inode *ei = kmem_cache_alloc(bdev_cachep, GFP_KERNEL);
786 if (!ei)
787 return NULL;
788 return &ei->vfs_inode;
789}
790
791static void bdev_i_callback(struct rcu_head *head)
792{
793 struct inode *inode = container_of(head, struct inode, i_rcu);
794 struct bdev_inode *bdi = BDEV_I(inode);
795
796 kmem_cache_free(bdev_cachep, bdi);
797}
798
799static void bdev_destroy_inode(struct inode *inode)
800{
801 call_rcu(&inode->i_rcu, bdev_i_callback);
802}
803
804static void init_once(void *foo)
805{
806 struct bdev_inode *ei = (struct bdev_inode *) foo;
807 struct block_device *bdev = &ei->bdev;
808
809 memset(bdev, 0, sizeof(*bdev));
810 mutex_init(&bdev->bd_mutex);
811 INIT_LIST_HEAD(&bdev->bd_list);
812#ifdef CONFIG_SYSFS
813 INIT_LIST_HEAD(&bdev->bd_holder_disks);
814#endif
815 bdev->bd_bdi = &noop_backing_dev_info;
816 inode_init_once(&ei->vfs_inode);
817
818 mutex_init(&bdev->bd_fsfreeze_mutex);
819}
820
821static void bdev_evict_inode(struct inode *inode)
822{
823 struct block_device *bdev = &BDEV_I(inode)->bdev;
824 truncate_inode_pages_final(&inode->i_data);
825 invalidate_inode_buffers(inode);
826 clear_inode(inode);
827 spin_lock(&bdev_lock);
828 list_del_init(&bdev->bd_list);
829 spin_unlock(&bdev_lock);
830
831 inode_detach_wb(inode);
832 if (bdev->bd_bdi != &noop_backing_dev_info) {
833 bdi_put(bdev->bd_bdi);
834 bdev->bd_bdi = &noop_backing_dev_info;
835 }
836}
837
838static const struct super_operations bdev_sops = {
839 .statfs = simple_statfs,
840 .alloc_inode = bdev_alloc_inode,
841 .destroy_inode = bdev_destroy_inode,
842 .drop_inode = generic_delete_inode,
843 .evict_inode = bdev_evict_inode,
844};
845
846static struct dentry *bd_mount(struct file_system_type *fs_type,
847 int flags, const char *dev_name, void *data)
848{
849 struct dentry *dent;
850 dent = mount_pseudo(fs_type, "bdev:", &bdev_sops, NULL, BDEVFS_MAGIC);
851 if (!IS_ERR(dent))
852 dent->d_sb->s_iflags |= SB_I_CGROUPWB;
853 return dent;
854}
855
856static struct file_system_type bd_type = {
857 .name = "bdev",
858 .mount = bd_mount,
859 .kill_sb = kill_anon_super,
860};
861
862struct super_block *blockdev_superblock __read_mostly;
863EXPORT_SYMBOL_GPL(blockdev_superblock);
864
865void __init bdev_cache_init(void)
866{
867 int err;
868 static struct vfsmount *bd_mnt;
869
870 bdev_cachep = kmem_cache_create("bdev_cache", sizeof(struct bdev_inode),
871 0, (SLAB_HWCACHE_ALIGN|SLAB_RECLAIM_ACCOUNT|
872 SLAB_MEM_SPREAD|SLAB_ACCOUNT|SLAB_PANIC),
873 init_once);
874 err = register_filesystem(&bd_type);
875 if (err)
876 panic("Cannot register bdev pseudo-fs");
877 bd_mnt = kern_mount(&bd_type);
878 if (IS_ERR(bd_mnt))
879 panic("Cannot create bdev pseudo-fs");
880 blockdev_superblock = bd_mnt->mnt_sb;
881}
882
883
884
885
886
887
888static inline unsigned long hash(dev_t dev)
889{
890 return MAJOR(dev)+MINOR(dev);
891}
892
893static int bdev_test(struct inode *inode, void *data)
894{
895 return BDEV_I(inode)->bdev.bd_dev == *(dev_t *)data;
896}
897
898static int bdev_set(struct inode *inode, void *data)
899{
900 BDEV_I(inode)->bdev.bd_dev = *(dev_t *)data;
901 return 0;
902}
903
904static LIST_HEAD(all_bdevs);
905
906
907
908
909
910void bdev_unhash_inode(dev_t dev)
911{
912 struct inode *inode;
913
914 inode = ilookup5(blockdev_superblock, hash(dev), bdev_test, &dev);
915 if (inode) {
916 remove_inode_hash(inode);
917 iput(inode);
918 }
919}
920
921struct block_device *bdget(dev_t dev)
922{
923 struct block_device *bdev;
924 struct inode *inode;
925
926 inode = iget5_locked(blockdev_superblock, hash(dev),
927 bdev_test, bdev_set, &dev);
928
929 if (!inode)
930 return NULL;
931
932 bdev = &BDEV_I(inode)->bdev;
933
934 if (inode->i_state & I_NEW) {
935 bdev->bd_contains = NULL;
936 bdev->bd_super = NULL;
937 bdev->bd_inode = inode;
938 bdev->bd_block_size = i_blocksize(inode);
939 bdev->bd_part_count = 0;
940 bdev->bd_invalidated = 0;
941 inode->i_mode = S_IFBLK;
942 inode->i_rdev = dev;
943 inode->i_bdev = bdev;
944 inode->i_data.a_ops = &def_blk_aops;
945 mapping_set_gfp_mask(&inode->i_data, GFP_USER);
946 spin_lock(&bdev_lock);
947 list_add(&bdev->bd_list, &all_bdevs);
948 spin_unlock(&bdev_lock);
949 unlock_new_inode(inode);
950 }
951 return bdev;
952}
953
954EXPORT_SYMBOL(bdget);
955
956
957
958
959
960struct block_device *bdgrab(struct block_device *bdev)
961{
962 ihold(bdev->bd_inode);
963 return bdev;
964}
965EXPORT_SYMBOL(bdgrab);
966
967long nr_blockdev_pages(void)
968{
969 struct block_device *bdev;
970 long ret = 0;
971 spin_lock(&bdev_lock);
972 list_for_each_entry(bdev, &all_bdevs, bd_list) {
973 ret += bdev->bd_inode->i_mapping->nrpages;
974 }
975 spin_unlock(&bdev_lock);
976 return ret;
977}
978
979void bdput(struct block_device *bdev)
980{
981 iput(bdev->bd_inode);
982}
983
984EXPORT_SYMBOL(bdput);
985
986static struct block_device *bd_acquire(struct inode *inode)
987{
988 struct block_device *bdev;
989
990 spin_lock(&bdev_lock);
991 bdev = inode->i_bdev;
992 if (bdev && !inode_unhashed(bdev->bd_inode)) {
993 bdgrab(bdev);
994 spin_unlock(&bdev_lock);
995 return bdev;
996 }
997 spin_unlock(&bdev_lock);
998
999
1000
1001
1002
1003
1004
1005 if (bdev)
1006 bd_forget(inode);
1007
1008 bdev = bdget(inode->i_rdev);
1009 if (bdev) {
1010 spin_lock(&bdev_lock);
1011 if (!inode->i_bdev) {
1012
1013
1014
1015
1016
1017
1018 bdgrab(bdev);
1019 inode->i_bdev = bdev;
1020 inode->i_mapping = bdev->bd_inode->i_mapping;
1021 }
1022 spin_unlock(&bdev_lock);
1023 }
1024 return bdev;
1025}
1026
1027
1028
1029void bd_forget(struct inode *inode)
1030{
1031 struct block_device *bdev = NULL;
1032
1033 spin_lock(&bdev_lock);
1034 if (!sb_is_blkdev_sb(inode->i_sb))
1035 bdev = inode->i_bdev;
1036 inode->i_bdev = NULL;
1037 inode->i_mapping = &inode->i_data;
1038 spin_unlock(&bdev_lock);
1039
1040 if (bdev)
1041 bdput(bdev);
1042}
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058static bool bd_may_claim(struct block_device *bdev, struct block_device *whole,
1059 void *holder)
1060{
1061 if (bdev->bd_holder == holder)
1062 return true;
1063 else if (bdev->bd_holder != NULL)
1064 return false;
1065 else if (whole == bdev)
1066 return true;
1067
1068 else if (whole->bd_holder == bd_may_claim)
1069 return true;
1070 else if (whole->bd_holder != NULL)
1071 return false;
1072 else
1073 return true;
1074}
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094static int bd_prepare_to_claim(struct block_device *bdev,
1095 struct block_device *whole, void *holder)
1096{
1097retry:
1098
1099 if (!bd_may_claim(bdev, whole, holder))
1100 return -EBUSY;
1101
1102
1103 if (whole->bd_claiming) {
1104 wait_queue_head_t *wq = bit_waitqueue(&whole->bd_claiming, 0);
1105 DEFINE_WAIT(wait);
1106
1107 prepare_to_wait(wq, &wait, TASK_UNINTERRUPTIBLE);
1108 spin_unlock(&bdev_lock);
1109 schedule();
1110 finish_wait(wq, &wait);
1111 spin_lock(&bdev_lock);
1112 goto retry;
1113 }
1114
1115
1116 return 0;
1117}
1118
1119static struct gendisk *bdev_get_gendisk(struct block_device *bdev, int *partno)
1120{
1121 struct gendisk *disk = get_gendisk(bdev->bd_dev, partno);
1122
1123 if (!disk)
1124 return NULL;
1125
1126
1127
1128
1129
1130
1131
1132
1133 if (inode_unhashed(bdev->bd_inode)) {
1134 put_disk_and_module(disk);
1135 return NULL;
1136 }
1137 return disk;
1138}
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163struct block_device *bd_start_claiming(struct block_device *bdev, void *holder)
1164{
1165 struct gendisk *disk;
1166 struct block_device *whole;
1167 int partno, err;
1168
1169 might_sleep();
1170
1171
1172
1173
1174
1175 disk = bdev_get_gendisk(bdev, &partno);
1176 if (!disk)
1177 return ERR_PTR(-ENXIO);
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187 if (partno)
1188 whole = bdget_disk(disk, 0);
1189 else
1190 whole = bdgrab(bdev);
1191
1192 put_disk_and_module(disk);
1193 if (!whole)
1194 return ERR_PTR(-ENOMEM);
1195
1196
1197 spin_lock(&bdev_lock);
1198
1199 err = bd_prepare_to_claim(bdev, whole, holder);
1200 if (err == 0) {
1201 whole->bd_claiming = holder;
1202 spin_unlock(&bdev_lock);
1203 return whole;
1204 } else {
1205 spin_unlock(&bdev_lock);
1206 bdput(whole);
1207 return ERR_PTR(err);
1208 }
1209}
1210EXPORT_SYMBOL(bd_start_claiming);
1211
1212static void bd_clear_claiming(struct block_device *whole, void *holder)
1213{
1214 lockdep_assert_held(&bdev_lock);
1215
1216 BUG_ON(whole->bd_claiming != holder);
1217 whole->bd_claiming = NULL;
1218 wake_up_bit(&whole->bd_claiming, 0);
1219}
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230void bd_finish_claiming(struct block_device *bdev, struct block_device *whole,
1231 void *holder)
1232{
1233 spin_lock(&bdev_lock);
1234 BUG_ON(!bd_may_claim(bdev, whole, holder));
1235
1236
1237
1238
1239 whole->bd_holders++;
1240 whole->bd_holder = bd_may_claim;
1241 bdev->bd_holders++;
1242 bdev->bd_holder = holder;
1243 bd_clear_claiming(whole, holder);
1244 spin_unlock(&bdev_lock);
1245}
1246EXPORT_SYMBOL(bd_finish_claiming);
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258void bd_abort_claiming(struct block_device *bdev, struct block_device *whole,
1259 void *holder)
1260{
1261 spin_lock(&bdev_lock);
1262 bd_clear_claiming(whole, holder);
1263 spin_unlock(&bdev_lock);
1264}
1265EXPORT_SYMBOL(bd_abort_claiming);
1266
1267#ifdef CONFIG_SYSFS
1268struct bd_holder_disk {
1269 struct list_head list;
1270 struct gendisk *disk;
1271 int refcnt;
1272};
1273
1274static struct bd_holder_disk *bd_find_holder_disk(struct block_device *bdev,
1275 struct gendisk *disk)
1276{
1277 struct bd_holder_disk *holder;
1278
1279 list_for_each_entry(holder, &bdev->bd_holder_disks, list)
1280 if (holder->disk == disk)
1281 return holder;
1282 return NULL;
1283}
1284
1285static int add_symlink(struct kobject *from, struct kobject *to)
1286{
1287 return sysfs_create_link(from, to, kobject_name(to));
1288}
1289
1290static void del_symlink(struct kobject *from, struct kobject *to)
1291{
1292 sysfs_remove_link(from, kobject_name(to));
1293}
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323int bd_link_disk_holder(struct block_device *bdev, struct gendisk *disk)
1324{
1325 struct bd_holder_disk *holder;
1326 int ret = 0;
1327
1328 mutex_lock(&bdev->bd_mutex);
1329
1330 WARN_ON_ONCE(!bdev->bd_holder);
1331
1332
1333 if (WARN_ON(!disk->slave_dir || !bdev->bd_part->holder_dir))
1334 goto out_unlock;
1335
1336 holder = bd_find_holder_disk(bdev, disk);
1337 if (holder) {
1338 holder->refcnt++;
1339 goto out_unlock;
1340 }
1341
1342 holder = kzalloc(sizeof(*holder), GFP_KERNEL);
1343 if (!holder) {
1344 ret = -ENOMEM;
1345 goto out_unlock;
1346 }
1347
1348 INIT_LIST_HEAD(&holder->list);
1349 holder->disk = disk;
1350 holder->refcnt = 1;
1351
1352 ret = add_symlink(disk->slave_dir, &part_to_dev(bdev->bd_part)->kobj);
1353 if (ret)
1354 goto out_free;
1355
1356 ret = add_symlink(bdev->bd_part->holder_dir, &disk_to_dev(disk)->kobj);
1357 if (ret)
1358 goto out_del;
1359
1360
1361
1362
1363 kobject_get(bdev->bd_part->holder_dir);
1364
1365 list_add(&holder->list, &bdev->bd_holder_disks);
1366 goto out_unlock;
1367
1368out_del:
1369 del_symlink(disk->slave_dir, &part_to_dev(bdev->bd_part)->kobj);
1370out_free:
1371 kfree(holder);
1372out_unlock:
1373 mutex_unlock(&bdev->bd_mutex);
1374 return ret;
1375}
1376EXPORT_SYMBOL_GPL(bd_link_disk_holder);
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388void bd_unlink_disk_holder(struct block_device *bdev, struct gendisk *disk)
1389{
1390 struct bd_holder_disk *holder;
1391
1392 mutex_lock(&bdev->bd_mutex);
1393
1394 holder = bd_find_holder_disk(bdev, disk);
1395
1396 if (!WARN_ON_ONCE(holder == NULL) && !--holder->refcnt) {
1397 del_symlink(disk->slave_dir, &part_to_dev(bdev->bd_part)->kobj);
1398 del_symlink(bdev->bd_part->holder_dir,
1399 &disk_to_dev(disk)->kobj);
1400 kobject_put(bdev->bd_part->holder_dir);
1401 list_del_init(&holder->list);
1402 kfree(holder);
1403 }
1404
1405 mutex_unlock(&bdev->bd_mutex);
1406}
1407EXPORT_SYMBOL_GPL(bd_unlink_disk_holder);
1408#endif
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420static void flush_disk(struct block_device *bdev, bool kill_dirty)
1421{
1422 if (__invalidate_device(bdev, kill_dirty)) {
1423 printk(KERN_WARNING "VFS: busy inodes on changed media or "
1424 "resized disk %s\n",
1425 bdev->bd_disk ? bdev->bd_disk->disk_name : "");
1426 }
1427 bdev->bd_invalidated = 1;
1428}
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440static void check_disk_size_change(struct gendisk *disk,
1441 struct block_device *bdev, bool verbose)
1442{
1443 loff_t disk_size, bdev_size;
1444
1445 disk_size = (loff_t)get_capacity(disk) << 9;
1446 bdev_size = i_size_read(bdev->bd_inode);
1447 if (disk_size != bdev_size) {
1448 if (verbose) {
1449 printk(KERN_INFO
1450 "%s: detected capacity change from %lld to %lld\n",
1451 disk->disk_name, bdev_size, disk_size);
1452 }
1453 i_size_write(bdev->bd_inode, disk_size);
1454 if (bdev_size > disk_size)
1455 flush_disk(bdev, false);
1456 }
1457 bdev->bd_invalidated = 0;
1458}
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468int revalidate_disk(struct gendisk *disk)
1469{
1470 int ret = 0;
1471
1472 if (disk->fops->revalidate_disk)
1473 ret = disk->fops->revalidate_disk(disk);
1474
1475
1476
1477
1478
1479 if (!(disk->flags & GENHD_FL_HIDDEN)) {
1480 struct block_device *bdev = bdget_disk(disk, 0);
1481
1482 if (!bdev)
1483 return ret;
1484
1485 mutex_lock(&bdev->bd_mutex);
1486 check_disk_size_change(disk, bdev, ret == 0);
1487 mutex_unlock(&bdev->bd_mutex);
1488 bdput(bdev);
1489 }
1490 return ret;
1491}
1492EXPORT_SYMBOL(revalidate_disk);
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503int check_disk_change(struct block_device *bdev)
1504{
1505 struct gendisk *disk = bdev->bd_disk;
1506 const struct block_device_operations *bdops = disk->fops;
1507 unsigned int events;
1508
1509 events = disk_clear_events(disk, DISK_EVENT_MEDIA_CHANGE |
1510 DISK_EVENT_EJECT_REQUEST);
1511 if (!(events & DISK_EVENT_MEDIA_CHANGE))
1512 return 0;
1513
1514 flush_disk(bdev, true);
1515 if (bdops->revalidate_disk)
1516 bdops->revalidate_disk(bdev->bd_disk);
1517 return 1;
1518}
1519
1520EXPORT_SYMBOL(check_disk_change);
1521
1522void bd_set_size(struct block_device *bdev, loff_t size)
1523{
1524 inode_lock(bdev->bd_inode);
1525 i_size_write(bdev->bd_inode, size);
1526 inode_unlock(bdev->bd_inode);
1527}
1528EXPORT_SYMBOL(bd_set_size);
1529
1530static void __blkdev_put(struct block_device *bdev, fmode_t mode, int for_part);
1531
1532int bdev_disk_changed(struct block_device *bdev, bool invalidate)
1533{
1534 struct gendisk *disk = bdev->bd_disk;
1535 int ret;
1536
1537 lockdep_assert_held(&bdev->bd_mutex);
1538
1539rescan:
1540 ret = blk_drop_partitions(disk, bdev);
1541 if (ret)
1542 return ret;
1543
1544 if (invalidate)
1545 set_capacity(disk, 0);
1546 else if (disk->fops->revalidate_disk)
1547 disk->fops->revalidate_disk(disk);
1548
1549 check_disk_size_change(disk, bdev, !invalidate);
1550
1551 if (get_capacity(disk)) {
1552 ret = blk_add_partitions(disk, bdev);
1553 if (ret == -EAGAIN)
1554 goto rescan;
1555 } else if (invalidate) {
1556
1557
1558
1559
1560 kobject_uevent(&disk_to_dev(disk)->kobj, KOBJ_CHANGE);
1561 }
1562
1563 return ret;
1564}
1565
1566
1567
1568
1569EXPORT_SYMBOL_GPL(bdev_disk_changed);
1570
1571
1572
1573
1574
1575
1576
1577
1578static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part)
1579{
1580 struct gendisk *disk;
1581 int ret;
1582 int partno;
1583 int perm = 0;
1584 bool first_open = false;
1585
1586 if (mode & FMODE_READ)
1587 perm |= MAY_READ;
1588 if (mode & FMODE_WRITE)
1589 perm |= MAY_WRITE;
1590
1591
1592
1593 if (!for_part) {
1594 ret = devcgroup_inode_permission(bdev->bd_inode, perm);
1595 if (ret != 0)
1596 return ret;
1597 }
1598
1599 restart:
1600
1601 ret = -ENXIO;
1602 disk = bdev_get_gendisk(bdev, &partno);
1603 if (!disk)
1604 goto out;
1605
1606 disk_block_events(disk);
1607 mutex_lock_nested(&bdev->bd_mutex, for_part);
1608 if (!bdev->bd_openers) {
1609 first_open = true;
1610 bdev->bd_disk = disk;
1611 bdev->bd_queue = disk->queue;
1612 bdev->bd_contains = bdev;
1613 bdev->bd_partno = partno;
1614
1615 if (!partno) {
1616 ret = -ENXIO;
1617 bdev->bd_part = disk_get_part(disk, partno);
1618 if (!bdev->bd_part)
1619 goto out_clear;
1620
1621 ret = 0;
1622 if (disk->fops->open) {
1623 ret = disk->fops->open(bdev, mode);
1624 if (ret == -ERESTARTSYS) {
1625
1626
1627
1628
1629 disk_put_part(bdev->bd_part);
1630 bdev->bd_part = NULL;
1631 bdev->bd_disk = NULL;
1632 bdev->bd_queue = NULL;
1633 mutex_unlock(&bdev->bd_mutex);
1634 disk_unblock_events(disk);
1635 put_disk_and_module(disk);
1636 goto restart;
1637 }
1638 }
1639
1640 if (!ret) {
1641 bd_set_size(bdev,(loff_t)get_capacity(disk)<<9);
1642 set_init_blocksize(bdev);
1643 }
1644
1645
1646
1647
1648
1649
1650
1651 if (bdev->bd_invalidated &&
1652 (!ret || ret == -ENOMEDIUM))
1653 bdev_disk_changed(bdev, ret == -ENOMEDIUM);
1654
1655 if (ret)
1656 goto out_clear;
1657 } else {
1658 struct block_device *whole;
1659 whole = bdget_disk(disk, 0);
1660 ret = -ENOMEM;
1661 if (!whole)
1662 goto out_clear;
1663 BUG_ON(for_part);
1664 ret = __blkdev_get(whole, mode, 1);
1665 if (ret) {
1666 bdput(whole);
1667 goto out_clear;
1668 }
1669 bdev->bd_contains = whole;
1670 bdev->bd_part = disk_get_part(disk, partno);
1671 if (!(disk->flags & GENHD_FL_UP) ||
1672 !bdev->bd_part || !bdev->bd_part->nr_sects) {
1673 ret = -ENXIO;
1674 goto out_clear;
1675 }
1676 bd_set_size(bdev, (loff_t)bdev->bd_part->nr_sects << 9);
1677 set_init_blocksize(bdev);
1678 }
1679
1680 if (bdev->bd_bdi == &noop_backing_dev_info)
1681 bdev->bd_bdi = bdi_get(disk->queue->backing_dev_info);
1682 } else {
1683 if (bdev->bd_contains == bdev) {
1684 ret = 0;
1685 if (bdev->bd_disk->fops->open)
1686 ret = bdev->bd_disk->fops->open(bdev, mode);
1687
1688 if (bdev->bd_invalidated &&
1689 (!ret || ret == -ENOMEDIUM))
1690 bdev_disk_changed(bdev, ret == -ENOMEDIUM);
1691 if (ret)
1692 goto out_unlock_bdev;
1693 }
1694 }
1695 bdev->bd_openers++;
1696 if (for_part)
1697 bdev->bd_part_count++;
1698 mutex_unlock(&bdev->bd_mutex);
1699 disk_unblock_events(disk);
1700
1701 if (!first_open)
1702 put_disk_and_module(disk);
1703 return 0;
1704
1705 out_clear:
1706 disk_put_part(bdev->bd_part);
1707 bdev->bd_disk = NULL;
1708 bdev->bd_part = NULL;
1709 bdev->bd_queue = NULL;
1710 if (bdev != bdev->bd_contains)
1711 __blkdev_put(bdev->bd_contains, mode, 1);
1712 bdev->bd_contains = NULL;
1713 out_unlock_bdev:
1714 mutex_unlock(&bdev->bd_mutex);
1715 disk_unblock_events(disk);
1716 put_disk_and_module(disk);
1717 out:
1718
1719 return ret;
1720}
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736
1737
1738
1739
1740
1741int blkdev_get(struct block_device *bdev, fmode_t mode, void *holder)
1742{
1743 struct block_device *whole = NULL;
1744 int res;
1745
1746 WARN_ON_ONCE((mode & FMODE_EXCL) && !holder);
1747
1748 if ((mode & FMODE_EXCL) && holder) {
1749 whole = bd_start_claiming(bdev, holder);
1750 if (IS_ERR(whole)) {
1751 bdput(bdev);
1752 return PTR_ERR(whole);
1753 }
1754 }
1755
1756 res = __blkdev_get(bdev, mode, 0);
1757
1758 if (whole) {
1759 struct gendisk *disk = whole->bd_disk;
1760
1761
1762 mutex_lock(&bdev->bd_mutex);
1763 if (!res)
1764 bd_finish_claiming(bdev, whole, holder);
1765 else
1766 bd_abort_claiming(bdev, whole, holder);
1767
1768
1769
1770
1771
1772
1773
1774 if (!res && (mode & FMODE_WRITE) && !bdev->bd_write_holder &&
1775 (disk->flags & GENHD_FL_BLOCK_EVENTS_ON_EXCL_WRITE)) {
1776 bdev->bd_write_holder = true;
1777 disk_block_events(disk);
1778 }
1779
1780 mutex_unlock(&bdev->bd_mutex);
1781 bdput(whole);
1782 }
1783
1784 if (res)
1785 bdput(bdev);
1786
1787 return res;
1788}
1789EXPORT_SYMBOL(blkdev_get);
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807
1808struct block_device *blkdev_get_by_path(const char *path, fmode_t mode,
1809 void *holder)
1810{
1811 struct block_device *bdev;
1812 int err;
1813
1814 bdev = lookup_bdev(path);
1815 if (IS_ERR(bdev))
1816 return bdev;
1817
1818 err = blkdev_get(bdev, mode, holder);
1819 if (err)
1820 return ERR_PTR(err);
1821
1822 if ((mode & FMODE_WRITE) && bdev_read_only(bdev)) {
1823 blkdev_put(bdev, mode);
1824 return ERR_PTR(-EACCES);
1825 }
1826
1827 return bdev;
1828}
1829EXPORT_SYMBOL(blkdev_get_by_path);
1830
1831
1832
1833
1834
1835
1836
1837
1838
1839
1840
1841
1842
1843
1844
1845
1846
1847
1848
1849
1850
1851
1852
1853struct block_device *blkdev_get_by_dev(dev_t dev, fmode_t mode, void *holder)
1854{
1855 struct block_device *bdev;
1856 int err;
1857
1858 bdev = bdget(dev);
1859 if (!bdev)
1860 return ERR_PTR(-ENOMEM);
1861
1862 err = blkdev_get(bdev, mode, holder);
1863 if (err)
1864 return ERR_PTR(err);
1865
1866 return bdev;
1867}
1868EXPORT_SYMBOL(blkdev_get_by_dev);
1869
1870static int blkdev_open(struct inode * inode, struct file * filp)
1871{
1872 struct block_device *bdev;
1873
1874
1875
1876
1877
1878
1879
1880 filp->f_flags |= O_LARGEFILE;
1881
1882 filp->f_mode |= FMODE_NOWAIT;
1883
1884 if (filp->f_flags & O_NDELAY)
1885 filp->f_mode |= FMODE_NDELAY;
1886 if (filp->f_flags & O_EXCL)
1887 filp->f_mode |= FMODE_EXCL;
1888 if ((filp->f_flags & O_ACCMODE) == 3)
1889 filp->f_mode |= FMODE_WRITE_IOCTL;
1890
1891 bdev = bd_acquire(inode);
1892 if (bdev == NULL)
1893 return -ENOMEM;
1894
1895 filp->f_mapping = bdev->bd_inode->i_mapping;
1896 filp->f_wb_err = filemap_sample_wb_err(filp->f_mapping);
1897
1898 return blkdev_get(bdev, filp->f_mode, filp);
1899}
1900
1901static void __blkdev_put(struct block_device *bdev, fmode_t mode, int for_part)
1902{
1903 struct gendisk *disk = bdev->bd_disk;
1904 struct block_device *victim = NULL;
1905
1906 mutex_lock_nested(&bdev->bd_mutex, for_part);
1907 if (for_part)
1908 bdev->bd_part_count--;
1909
1910 if (!--bdev->bd_openers) {
1911 WARN_ON_ONCE(bdev->bd_holders);
1912 sync_blockdev(bdev);
1913 kill_bdev(bdev);
1914
1915 bdev_write_inode(bdev);
1916 }
1917 if (bdev->bd_contains == bdev) {
1918 if (disk->fops->release)
1919 disk->fops->release(disk, mode);
1920 }
1921 if (!bdev->bd_openers) {
1922 disk_put_part(bdev->bd_part);
1923 bdev->bd_part = NULL;
1924 bdev->bd_disk = NULL;
1925 if (bdev != bdev->bd_contains)
1926 victim = bdev->bd_contains;
1927 bdev->bd_contains = NULL;
1928
1929 put_disk_and_module(disk);
1930 }
1931 mutex_unlock(&bdev->bd_mutex);
1932 bdput(bdev);
1933 if (victim)
1934 __blkdev_put(victim, mode, 1);
1935}
1936
1937void blkdev_put(struct block_device *bdev, fmode_t mode)
1938{
1939 mutex_lock(&bdev->bd_mutex);
1940
1941 if (mode & FMODE_EXCL) {
1942 bool bdev_free;
1943
1944
1945
1946
1947
1948
1949 spin_lock(&bdev_lock);
1950
1951 WARN_ON_ONCE(--bdev->bd_holders < 0);
1952 WARN_ON_ONCE(--bdev->bd_contains->bd_holders < 0);
1953
1954
1955 if ((bdev_free = !bdev->bd_holders))
1956 bdev->bd_holder = NULL;
1957 if (!bdev->bd_contains->bd_holders)
1958 bdev->bd_contains->bd_holder = NULL;
1959
1960 spin_unlock(&bdev_lock);
1961
1962
1963
1964
1965
1966 if (bdev_free && bdev->bd_write_holder) {
1967 disk_unblock_events(bdev->bd_disk);
1968 bdev->bd_write_holder = false;
1969 }
1970 }
1971
1972
1973
1974
1975
1976
1977 disk_flush_events(bdev->bd_disk, DISK_EVENT_MEDIA_CHANGE);
1978
1979 mutex_unlock(&bdev->bd_mutex);
1980
1981 __blkdev_put(bdev, mode, 0);
1982}
1983EXPORT_SYMBOL(blkdev_put);
1984
1985static int blkdev_close(struct inode * inode, struct file * filp)
1986{
1987 struct block_device *bdev = I_BDEV(bdev_file_inode(filp));
1988 blkdev_put(bdev, filp->f_mode);
1989 return 0;
1990}
1991
1992static long block_ioctl(struct file *file, unsigned cmd, unsigned long arg)
1993{
1994 struct block_device *bdev = I_BDEV(bdev_file_inode(file));
1995 fmode_t mode = file->f_mode;
1996
1997
1998
1999
2000
2001 if (file->f_flags & O_NDELAY)
2002 mode |= FMODE_NDELAY;
2003 else
2004 mode &= ~FMODE_NDELAY;
2005
2006 return blkdev_ioctl(bdev, mode, cmd, arg);
2007}
2008
2009
2010
2011
2012
2013
2014
2015
2016ssize_t blkdev_write_iter(struct kiocb *iocb, struct iov_iter *from)
2017{
2018 struct file *file = iocb->ki_filp;
2019 struct inode *bd_inode = bdev_file_inode(file);
2020 loff_t size = i_size_read(bd_inode);
2021 struct blk_plug plug;
2022 ssize_t ret;
2023
2024 if (bdev_read_only(I_BDEV(bd_inode)))
2025 return -EPERM;
2026
2027 if (!iov_iter_count(from))
2028 return 0;
2029
2030 if (iocb->ki_pos >= size)
2031 return -ENOSPC;
2032
2033 if ((iocb->ki_flags & (IOCB_NOWAIT | IOCB_DIRECT)) == IOCB_NOWAIT)
2034 return -EOPNOTSUPP;
2035
2036 iov_iter_truncate(from, size - iocb->ki_pos);
2037
2038 blk_start_plug(&plug);
2039 ret = __generic_file_write_iter(iocb, from);
2040 if (ret > 0)
2041 ret = generic_write_sync(iocb, ret);
2042 blk_finish_plug(&plug);
2043 return ret;
2044}
2045EXPORT_SYMBOL_GPL(blkdev_write_iter);
2046
2047ssize_t blkdev_read_iter(struct kiocb *iocb, struct iov_iter *to)
2048{
2049 struct file *file = iocb->ki_filp;
2050 struct inode *bd_inode = bdev_file_inode(file);
2051 loff_t size = i_size_read(bd_inode);
2052 loff_t pos = iocb->ki_pos;
2053
2054 if (pos >= size)
2055 return 0;
2056
2057 size -= pos;
2058 iov_iter_truncate(to, size);
2059 return generic_file_read_iter(iocb, to);
2060}
2061EXPORT_SYMBOL_GPL(blkdev_read_iter);
2062
2063
2064
2065
2066
2067static int blkdev_releasepage(struct page *page, gfp_t wait)
2068{
2069 struct super_block *super = BDEV_I(page->mapping->host)->bdev.bd_super;
2070
2071 if (super && super->s_op->bdev_try_to_free_page)
2072 return super->s_op->bdev_try_to_free_page(super, page, wait);
2073
2074 return try_to_free_buffers(page);
2075}
2076
2077static int blkdev_writepages(struct address_space *mapping,
2078 struct writeback_control *wbc)
2079{
2080 return generic_writepages(mapping, wbc);
2081}
2082
2083static const struct address_space_operations def_blk_aops = {
2084 .readpage = blkdev_readpage,
2085 .readpages = blkdev_readpages,
2086 .writepage = blkdev_writepage,
2087 .write_begin = blkdev_write_begin,
2088 .write_end = blkdev_write_end,
2089 .writepages = blkdev_writepages,
2090 .releasepage = blkdev_releasepage,
2091 .direct_IO = blkdev_direct_IO,
2092 .is_dirty_writeback = buffer_check_dirty_writeback,
2093};
2094
2095#define BLKDEV_FALLOC_FL_SUPPORTED \
2096 (FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE | \
2097 FALLOC_FL_ZERO_RANGE | FALLOC_FL_NO_HIDE_STALE)
2098
2099static long blkdev_fallocate(struct file *file, int mode, loff_t start,
2100 loff_t len)
2101{
2102 struct block_device *bdev = I_BDEV(bdev_file_inode(file));
2103 struct address_space *mapping;
2104 loff_t end = start + len - 1;
2105 loff_t isize;
2106 int error;
2107
2108
2109 if (mode & ~BLKDEV_FALLOC_FL_SUPPORTED)
2110 return -EOPNOTSUPP;
2111
2112
2113 isize = i_size_read(bdev->bd_inode);
2114 if (start >= isize)
2115 return -EINVAL;
2116 if (end >= isize) {
2117 if (mode & FALLOC_FL_KEEP_SIZE) {
2118 len = isize - start;
2119 end = start + len - 1;
2120 } else
2121 return -EINVAL;
2122 }
2123
2124
2125
2126
2127 if ((start | len) & (bdev_logical_block_size(bdev) - 1))
2128 return -EINVAL;
2129
2130
2131 mapping = bdev->bd_inode->i_mapping;
2132 truncate_inode_pages_range(mapping, start, end);
2133
2134 switch (mode) {
2135 case FALLOC_FL_ZERO_RANGE:
2136 case FALLOC_FL_ZERO_RANGE | FALLOC_FL_KEEP_SIZE:
2137 error = blkdev_issue_zeroout(bdev, start >> 9, len >> 9,
2138 GFP_KERNEL, BLKDEV_ZERO_NOUNMAP);
2139 break;
2140 case FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE:
2141 error = blkdev_issue_zeroout(bdev, start >> 9, len >> 9,
2142 GFP_KERNEL, BLKDEV_ZERO_NOFALLBACK);
2143 break;
2144 case FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE | FALLOC_FL_NO_HIDE_STALE:
2145 error = blkdev_issue_discard(bdev, start >> 9, len >> 9,
2146 GFP_KERNEL, 0);
2147 break;
2148 default:
2149 return -EOPNOTSUPP;
2150 }
2151 if (error)
2152 return error;
2153
2154
2155
2156
2157
2158
2159 return invalidate_inode_pages2_range(mapping,
2160 start >> PAGE_SHIFT,
2161 end >> PAGE_SHIFT);
2162}
2163
2164const struct file_operations def_blk_fops = {
2165 .open = blkdev_open,
2166 .release = blkdev_close,
2167 .llseek = block_llseek,
2168 .read_iter = blkdev_read_iter,
2169 .write_iter = blkdev_write_iter,
2170 .iopoll = blkdev_iopoll,
2171 .mmap = generic_file_mmap,
2172 .fsync = blkdev_fsync,
2173 .unlocked_ioctl = block_ioctl,
2174#ifdef CONFIG_COMPAT
2175 .compat_ioctl = compat_blkdev_ioctl,
2176#endif
2177 .splice_read = generic_file_splice_read,
2178 .splice_write = iter_file_splice_write,
2179 .fallocate = blkdev_fallocate,
2180};
2181
2182int ioctl_by_bdev(struct block_device *bdev, unsigned cmd, unsigned long arg)
2183{
2184 int res;
2185 mm_segment_t old_fs = get_fs();
2186 set_fs(KERNEL_DS);
2187 res = blkdev_ioctl(bdev, 0, cmd, arg);
2188 set_fs(old_fs);
2189 return res;
2190}
2191
2192EXPORT_SYMBOL(ioctl_by_bdev);
2193
2194
2195
2196
2197
2198
2199
2200
2201
2202struct block_device *lookup_bdev(const char *pathname)
2203{
2204 struct block_device *bdev;
2205 struct inode *inode;
2206 struct path path;
2207 int error;
2208
2209 if (!pathname || !*pathname)
2210 return ERR_PTR(-EINVAL);
2211
2212 error = kern_path(pathname, LOOKUP_FOLLOW, &path);
2213 if (error)
2214 return ERR_PTR(error);
2215
2216 inode = d_backing_inode(path.dentry);
2217 error = -ENOTBLK;
2218 if (!S_ISBLK(inode->i_mode))
2219 goto fail;
2220 error = -EACCES;
2221 if (!may_open_dev(&path))
2222 goto fail;
2223 error = -ENOMEM;
2224 bdev = bd_acquire(inode);
2225 if (!bdev)
2226 goto fail;
2227out:
2228 path_put(&path);
2229 return bdev;
2230fail:
2231 bdev = ERR_PTR(error);
2232 goto out;
2233}
2234EXPORT_SYMBOL(lookup_bdev);
2235
2236int __invalidate_device(struct block_device *bdev, bool kill_dirty)
2237{
2238 struct super_block *sb = get_super(bdev);
2239 int res = 0;
2240
2241 if (sb) {
2242
2243
2244
2245
2246
2247
2248 shrink_dcache_sb(sb);
2249 res = invalidate_inodes(sb, kill_dirty);
2250 drop_super(sb);
2251 }
2252 invalidate_bdev(bdev);
2253 return res;
2254}
2255EXPORT_SYMBOL(__invalidate_device);
2256
2257void iterate_bdevs(void (*func)(struct block_device *, void *), void *arg)
2258{
2259 struct inode *inode, *old_inode = NULL;
2260
2261 spin_lock(&blockdev_superblock->s_inode_list_lock);
2262 list_for_each_entry(inode, &blockdev_superblock->s_inodes, i_sb_list) {
2263 struct address_space *mapping = inode->i_mapping;
2264 struct block_device *bdev;
2265
2266 spin_lock(&inode->i_lock);
2267 if (inode->i_state & (I_FREEING|I_WILL_FREE|I_NEW) ||
2268 mapping->nrpages == 0) {
2269 spin_unlock(&inode->i_lock);
2270 continue;
2271 }
2272 __iget(inode);
2273 spin_unlock(&inode->i_lock);
2274 spin_unlock(&blockdev_superblock->s_inode_list_lock);
2275
2276
2277
2278
2279
2280
2281
2282
2283 iput(old_inode);
2284 old_inode = inode;
2285 bdev = I_BDEV(inode);
2286
2287 mutex_lock(&bdev->bd_mutex);
2288 if (bdev->bd_openers)
2289 func(bdev, arg);
2290 mutex_unlock(&bdev->bd_mutex);
2291
2292 spin_lock(&blockdev_superblock->s_inode_list_lock);
2293 }
2294 spin_unlock(&blockdev_superblock->s_inode_list_lock);
2295 iput(old_inode);
2296}
2297