1
2
3
4
5
6
7
8
9#include <linux/init.h>
10#include <linux/mm.h>
11#include <linux/fcntl.h>
12#include <linux/slab.h>
13#include <linux/kmod.h>
14#include <linux/major.h>
15#include <linux/device_cgroup.h>
16#include <linux/highmem.h>
17#include <linux/blkdev.h>
18#include <linux/backing-dev.h>
19#include <linux/module.h>
20#include <linux/blkpg.h>
21#include <linux/magic.h>
22#include <linux/dax.h>
23#include <linux/buffer_head.h>
24#include <linux/swap.h>
25#include <linux/pagevec.h>
26#include <linux/writeback.h>
27#include <linux/mpage.h>
28#include <linux/mount.h>
29#include <linux/uio.h>
30#include <linux/namei.h>
31#include <linux/log2.h>
32#include <linux/cleancache.h>
33#include <linux/task_io_accounting_ops.h>
34#include <linux/falloc.h>
35#include <linux/uaccess.h>
36#include "internal.h"
37
38struct bdev_inode {
39 struct block_device bdev;
40 struct inode vfs_inode;
41};
42
43static const struct address_space_operations def_blk_aops;
44
45static inline struct bdev_inode *BDEV_I(struct inode *inode)
46{
47 return container_of(inode, struct bdev_inode, vfs_inode);
48}
49
50struct block_device *I_BDEV(struct inode *inode)
51{
52 return &BDEV_I(inode)->bdev;
53}
54EXPORT_SYMBOL(I_BDEV);
55
56static void bdev_write_inode(struct block_device *bdev)
57{
58 struct inode *inode = bdev->bd_inode;
59 int ret;
60
61 spin_lock(&inode->i_lock);
62 while (inode->i_state & I_DIRTY) {
63 spin_unlock(&inode->i_lock);
64 ret = write_inode_now(inode, true);
65 if (ret) {
66 char name[BDEVNAME_SIZE];
67 pr_warn_ratelimited("VFS: Dirty inode writeback failed "
68 "for block device %s (err=%d).\n",
69 bdevname(bdev, name), ret);
70 }
71 spin_lock(&inode->i_lock);
72 }
73 spin_unlock(&inode->i_lock);
74}
75
76
77void kill_bdev(struct block_device *bdev)
78{
79 struct address_space *mapping = bdev->bd_inode->i_mapping;
80
81 if (mapping->nrpages == 0 && mapping->nrexceptional == 0)
82 return;
83
84 invalidate_bh_lrus();
85 truncate_inode_pages(mapping, 0);
86}
87EXPORT_SYMBOL(kill_bdev);
88
89
90void invalidate_bdev(struct block_device *bdev)
91{
92 struct address_space *mapping = bdev->bd_inode->i_mapping;
93
94 if (mapping->nrpages) {
95 invalidate_bh_lrus();
96 lru_add_drain_all();
97 invalidate_mapping_pages(mapping, 0, -1);
98 }
99
100
101
102 cleancache_invalidate_inode(mapping);
103}
104EXPORT_SYMBOL(invalidate_bdev);
105
106static void set_init_blocksize(struct block_device *bdev)
107{
108 unsigned bsize = bdev_logical_block_size(bdev);
109 loff_t size = i_size_read(bdev->bd_inode);
110
111 while (bsize < PAGE_SIZE) {
112 if (size & bsize)
113 break;
114 bsize <<= 1;
115 }
116 bdev->bd_block_size = bsize;
117 bdev->bd_inode->i_blkbits = blksize_bits(bsize);
118}
119
120int set_blocksize(struct block_device *bdev, int size)
121{
122
123 if (size > PAGE_SIZE || size < 512 || !is_power_of_2(size))
124 return -EINVAL;
125
126
127 if (size < bdev_logical_block_size(bdev))
128 return -EINVAL;
129
130
131 if (bdev->bd_block_size != size) {
132 sync_blockdev(bdev);
133 bdev->bd_block_size = size;
134 bdev->bd_inode->i_blkbits = blksize_bits(size);
135 kill_bdev(bdev);
136 }
137 return 0;
138}
139
140EXPORT_SYMBOL(set_blocksize);
141
142int sb_set_blocksize(struct super_block *sb, int size)
143{
144 if (set_blocksize(sb->s_bdev, size))
145 return 0;
146
147
148 sb->s_blocksize = size;
149 sb->s_blocksize_bits = blksize_bits(size);
150 return sb->s_blocksize;
151}
152
153EXPORT_SYMBOL(sb_set_blocksize);
154
155int sb_min_blocksize(struct super_block *sb, int size)
156{
157 int minsize = bdev_logical_block_size(sb->s_bdev);
158 if (size < minsize)
159 size = minsize;
160 return sb_set_blocksize(sb, size);
161}
162
163EXPORT_SYMBOL(sb_min_blocksize);
164
165static int
166blkdev_get_block(struct inode *inode, sector_t iblock,
167 struct buffer_head *bh, int create)
168{
169 bh->b_bdev = I_BDEV(inode);
170 bh->b_blocknr = iblock;
171 set_buffer_mapped(bh);
172 return 0;
173}
174
175static struct inode *bdev_file_inode(struct file *file)
176{
177 return file->f_mapping->host;
178}
179
180static unsigned int dio_bio_write_op(struct kiocb *iocb)
181{
182 unsigned int op = REQ_OP_WRITE | REQ_SYNC | REQ_IDLE;
183
184
185 if (iocb->ki_flags & IOCB_DSYNC)
186 op |= REQ_FUA;
187 return op;
188}
189
190#define DIO_INLINE_BIO_VECS 4
191
192static void blkdev_bio_end_io_simple(struct bio *bio)
193{
194 struct task_struct *waiter = bio->bi_private;
195
196 WRITE_ONCE(bio->bi_private, NULL);
197 blk_wake_io_task(waiter);
198}
199
200static ssize_t
201__blkdev_direct_IO_simple(struct kiocb *iocb, struct iov_iter *iter,
202 int nr_pages)
203{
204 struct file *file = iocb->ki_filp;
205 struct block_device *bdev = I_BDEV(bdev_file_inode(file));
206 struct bio_vec inline_vecs[DIO_INLINE_BIO_VECS], *vecs, *bvec;
207 loff_t pos = iocb->ki_pos;
208 bool should_dirty = false;
209 struct bio bio;
210 ssize_t ret;
211 blk_qc_t qc;
212 struct bvec_iter_all iter_all;
213
214 if ((pos | iov_iter_alignment(iter)) &
215 (bdev_logical_block_size(bdev) - 1))
216 return -EINVAL;
217
218 if (nr_pages <= DIO_INLINE_BIO_VECS)
219 vecs = inline_vecs;
220 else {
221 vecs = kmalloc_array(nr_pages, sizeof(struct bio_vec),
222 GFP_KERNEL);
223 if (!vecs)
224 return -ENOMEM;
225 }
226
227 bio_init(&bio, vecs, nr_pages);
228 bio_set_dev(&bio, bdev);
229 bio.bi_iter.bi_sector = pos >> 9;
230 bio.bi_write_hint = iocb->ki_hint;
231 bio.bi_private = current;
232 bio.bi_end_io = blkdev_bio_end_io_simple;
233 bio.bi_ioprio = iocb->ki_ioprio;
234
235 ret = bio_iov_iter_get_pages(&bio, iter);
236 if (unlikely(ret))
237 goto out;
238 ret = bio.bi_iter.bi_size;
239
240 if (iov_iter_rw(iter) == READ) {
241 bio.bi_opf = REQ_OP_READ;
242 if (iter_is_iovec(iter))
243 should_dirty = true;
244 } else {
245 bio.bi_opf = dio_bio_write_op(iocb);
246 task_io_account_write(ret);
247 }
248 if (iocb->ki_flags & IOCB_HIPRI)
249 bio_set_polled(&bio, iocb);
250
251 qc = submit_bio(&bio);
252 for (;;) {
253 set_current_state(TASK_UNINTERRUPTIBLE);
254 if (!READ_ONCE(bio.bi_private))
255 break;
256 if (!(iocb->ki_flags & IOCB_HIPRI) ||
257 !blk_poll(bdev_get_queue(bdev), qc, true))
258 io_schedule();
259 }
260 __set_current_state(TASK_RUNNING);
261
262 bio_for_each_segment_all(bvec, &bio, iter_all) {
263 if (should_dirty && !PageCompound(bvec->bv_page))
264 set_page_dirty_lock(bvec->bv_page);
265 if (!bio_flagged(&bio, BIO_NO_PAGE_REF))
266 put_page(bvec->bv_page);
267 }
268
269 if (unlikely(bio.bi_status))
270 ret = blk_status_to_errno(bio.bi_status);
271
272out:
273 if (vecs != inline_vecs)
274 kfree(vecs);
275
276 bio_uninit(&bio);
277
278 return ret;
279}
280
281struct blkdev_dio {
282 union {
283 struct kiocb *iocb;
284 struct task_struct *waiter;
285 };
286 size_t size;
287 atomic_t ref;
288 bool multi_bio : 1;
289 bool should_dirty : 1;
290 bool is_sync : 1;
291 struct bio bio;
292};
293
294static struct bio_set blkdev_dio_pool;
295
296static int blkdev_iopoll(struct kiocb *kiocb, bool wait)
297{
298 struct block_device *bdev = I_BDEV(kiocb->ki_filp->f_mapping->host);
299 struct request_queue *q = bdev_get_queue(bdev);
300
301 return blk_poll(q, READ_ONCE(kiocb->ki_cookie), wait);
302}
303
304static void blkdev_bio_end_io(struct bio *bio)
305{
306 struct blkdev_dio *dio = bio->bi_private;
307 bool should_dirty = dio->should_dirty;
308
309 if (bio->bi_status && !dio->bio.bi_status)
310 dio->bio.bi_status = bio->bi_status;
311
312 if (!dio->multi_bio || atomic_dec_and_test(&dio->ref)) {
313 if (!dio->is_sync) {
314 struct kiocb *iocb = dio->iocb;
315 ssize_t ret;
316
317 if (likely(!dio->bio.bi_status)) {
318 ret = dio->size;
319 iocb->ki_pos += ret;
320 } else {
321 ret = blk_status_to_errno(dio->bio.bi_status);
322 }
323
324 dio->iocb->ki_complete(iocb, ret, 0);
325 if (dio->multi_bio)
326 bio_put(&dio->bio);
327 } else {
328 struct task_struct *waiter = dio->waiter;
329
330 WRITE_ONCE(dio->waiter, NULL);
331 blk_wake_io_task(waiter);
332 }
333 }
334
335 if (should_dirty) {
336 bio_check_pages_dirty(bio);
337 } else {
338 if (!bio_flagged(bio, BIO_NO_PAGE_REF)) {
339 struct bvec_iter_all iter_all;
340 struct bio_vec *bvec;
341
342 bio_for_each_segment_all(bvec, bio, iter_all)
343 put_page(bvec->bv_page);
344 }
345 bio_put(bio);
346 }
347}
348
349static ssize_t
350__blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter, int nr_pages)
351{
352 struct file *file = iocb->ki_filp;
353 struct inode *inode = bdev_file_inode(file);
354 struct block_device *bdev = I_BDEV(inode);
355 struct blk_plug plug;
356 struct blkdev_dio *dio;
357 struct bio *bio;
358 bool is_poll = (iocb->ki_flags & IOCB_HIPRI) != 0;
359 bool is_read = (iov_iter_rw(iter) == READ), is_sync;
360 loff_t pos = iocb->ki_pos;
361 blk_qc_t qc = BLK_QC_T_NONE;
362 int ret = 0;
363
364 if ((pos | iov_iter_alignment(iter)) &
365 (bdev_logical_block_size(bdev) - 1))
366 return -EINVAL;
367
368 bio = bio_alloc_bioset(GFP_KERNEL, nr_pages, &blkdev_dio_pool);
369
370 dio = container_of(bio, struct blkdev_dio, bio);
371 dio->is_sync = is_sync = is_sync_kiocb(iocb);
372 if (dio->is_sync) {
373 dio->waiter = current;
374 bio_get(bio);
375 } else {
376 dio->iocb = iocb;
377 }
378
379 dio->size = 0;
380 dio->multi_bio = false;
381 dio->should_dirty = is_read && iter_is_iovec(iter);
382
383
384
385
386
387 if (!is_poll)
388 blk_start_plug(&plug);
389
390 for (;;) {
391 bio_set_dev(bio, bdev);
392 bio->bi_iter.bi_sector = pos >> 9;
393 bio->bi_write_hint = iocb->ki_hint;
394 bio->bi_private = dio;
395 bio->bi_end_io = blkdev_bio_end_io;
396 bio->bi_ioprio = iocb->ki_ioprio;
397
398 ret = bio_iov_iter_get_pages(bio, iter);
399 if (unlikely(ret)) {
400 bio->bi_status = BLK_STS_IOERR;
401 bio_endio(bio);
402 break;
403 }
404
405 if (is_read) {
406 bio->bi_opf = REQ_OP_READ;
407 if (dio->should_dirty)
408 bio_set_pages_dirty(bio);
409 } else {
410 bio->bi_opf = dio_bio_write_op(iocb);
411 task_io_account_write(bio->bi_iter.bi_size);
412 }
413
414 dio->size += bio->bi_iter.bi_size;
415 pos += bio->bi_iter.bi_size;
416
417 nr_pages = iov_iter_npages(iter, BIO_MAX_PAGES);
418 if (!nr_pages) {
419 bool polled = false;
420
421 if (iocb->ki_flags & IOCB_HIPRI) {
422 bio_set_polled(bio, iocb);
423 polled = true;
424 }
425
426 qc = submit_bio(bio);
427
428 if (polled)
429 WRITE_ONCE(iocb->ki_cookie, qc);
430 break;
431 }
432
433 if (!dio->multi_bio) {
434
435
436
437
438
439 if (!is_sync)
440 bio_get(bio);
441 dio->multi_bio = true;
442 atomic_set(&dio->ref, 2);
443 } else {
444 atomic_inc(&dio->ref);
445 }
446
447 submit_bio(bio);
448 bio = bio_alloc(GFP_KERNEL, nr_pages);
449 }
450
451 if (!is_poll)
452 blk_finish_plug(&plug);
453
454 if (!is_sync)
455 return -EIOCBQUEUED;
456
457 for (;;) {
458 set_current_state(TASK_UNINTERRUPTIBLE);
459 if (!READ_ONCE(dio->waiter))
460 break;
461
462 if (!(iocb->ki_flags & IOCB_HIPRI) ||
463 !blk_poll(bdev_get_queue(bdev), qc, true))
464 io_schedule();
465 }
466 __set_current_state(TASK_RUNNING);
467
468 if (!ret)
469 ret = blk_status_to_errno(dio->bio.bi_status);
470 if (likely(!ret))
471 ret = dio->size;
472
473 bio_put(&dio->bio);
474 return ret;
475}
476
477static ssize_t
478blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter)
479{
480 int nr_pages;
481
482 nr_pages = iov_iter_npages(iter, BIO_MAX_PAGES + 1);
483 if (!nr_pages)
484 return 0;
485 if (is_sync_kiocb(iocb) && nr_pages <= BIO_MAX_PAGES)
486 return __blkdev_direct_IO_simple(iocb, iter, nr_pages);
487
488 return __blkdev_direct_IO(iocb, iter, min(nr_pages, BIO_MAX_PAGES));
489}
490
491static __init int blkdev_init(void)
492{
493 return bioset_init(&blkdev_dio_pool, 4, offsetof(struct blkdev_dio, bio), BIOSET_NEED_BVECS);
494}
495module_init(blkdev_init);
496
497int __sync_blockdev(struct block_device *bdev, int wait)
498{
499 if (!bdev)
500 return 0;
501 if (!wait)
502 return filemap_flush(bdev->bd_inode->i_mapping);
503 return filemap_write_and_wait(bdev->bd_inode->i_mapping);
504}
505
506
507
508
509
510int sync_blockdev(struct block_device *bdev)
511{
512 return __sync_blockdev(bdev, 1);
513}
514EXPORT_SYMBOL(sync_blockdev);
515
516
517
518
519
520
521int fsync_bdev(struct block_device *bdev)
522{
523 struct super_block *sb = get_super(bdev);
524 if (sb) {
525 int res = sync_filesystem(sb);
526 drop_super(sb);
527 return res;
528 }
529 return sync_blockdev(bdev);
530}
531EXPORT_SYMBOL(fsync_bdev);
532
533
534
535
536
537
538
539
540
541
542
543
544
545struct super_block *freeze_bdev(struct block_device *bdev)
546{
547 struct super_block *sb;
548 int error = 0;
549
550 mutex_lock(&bdev->bd_fsfreeze_mutex);
551 if (++bdev->bd_fsfreeze_count > 1) {
552
553
554
555
556
557 sb = get_super(bdev);
558 if (sb)
559 drop_super(sb);
560 mutex_unlock(&bdev->bd_fsfreeze_mutex);
561 return sb;
562 }
563
564 sb = get_active_super(bdev);
565 if (!sb)
566 goto out;
567 if (sb->s_op->freeze_super)
568 error = sb->s_op->freeze_super(sb);
569 else
570 error = freeze_super(sb);
571 if (error) {
572 deactivate_super(sb);
573 bdev->bd_fsfreeze_count--;
574 mutex_unlock(&bdev->bd_fsfreeze_mutex);
575 return ERR_PTR(error);
576 }
577 deactivate_super(sb);
578 out:
579 sync_blockdev(bdev);
580 mutex_unlock(&bdev->bd_fsfreeze_mutex);
581 return sb;
582}
583EXPORT_SYMBOL(freeze_bdev);
584
585
586
587
588
589
590
591
592int thaw_bdev(struct block_device *bdev, struct super_block *sb)
593{
594 int error = -EINVAL;
595
596 mutex_lock(&bdev->bd_fsfreeze_mutex);
597 if (!bdev->bd_fsfreeze_count)
598 goto out;
599
600 error = 0;
601 if (--bdev->bd_fsfreeze_count > 0)
602 goto out;
603
604 if (!sb)
605 goto out;
606
607 if (sb->s_op->thaw_super)
608 error = sb->s_op->thaw_super(sb);
609 else
610 error = thaw_super(sb);
611 if (error)
612 bdev->bd_fsfreeze_count++;
613out:
614 mutex_unlock(&bdev->bd_fsfreeze_mutex);
615 return error;
616}
617EXPORT_SYMBOL(thaw_bdev);
618
619static int blkdev_writepage(struct page *page, struct writeback_control *wbc)
620{
621 return block_write_full_page(page, blkdev_get_block, wbc);
622}
623
624static int blkdev_readpage(struct file * file, struct page * page)
625{
626 return block_read_full_page(page, blkdev_get_block);
627}
628
629static int blkdev_readpages(struct file *file, struct address_space *mapping,
630 struct list_head *pages, unsigned nr_pages)
631{
632 return mpage_readpages(mapping, pages, nr_pages, blkdev_get_block);
633}
634
635static int blkdev_write_begin(struct file *file, struct address_space *mapping,
636 loff_t pos, unsigned len, unsigned flags,
637 struct page **pagep, void **fsdata)
638{
639 return block_write_begin(mapping, pos, len, flags, pagep,
640 blkdev_get_block);
641}
642
643static int blkdev_write_end(struct file *file, struct address_space *mapping,
644 loff_t pos, unsigned len, unsigned copied,
645 struct page *page, void *fsdata)
646{
647 int ret;
648 ret = block_write_end(file, mapping, pos, len, copied, page, fsdata);
649
650 unlock_page(page);
651 put_page(page);
652
653 return ret;
654}
655
656
657
658
659
660
661static loff_t block_llseek(struct file *file, loff_t offset, int whence)
662{
663 struct inode *bd_inode = bdev_file_inode(file);
664 loff_t retval;
665
666 inode_lock(bd_inode);
667 retval = fixed_size_llseek(file, offset, whence, i_size_read(bd_inode));
668 inode_unlock(bd_inode);
669 return retval;
670}
671
672int blkdev_fsync(struct file *filp, loff_t start, loff_t end, int datasync)
673{
674 struct inode *bd_inode = bdev_file_inode(filp);
675 struct block_device *bdev = I_BDEV(bd_inode);
676 int error;
677
678 error = file_write_and_wait_range(filp, start, end);
679 if (error)
680 return error;
681
682
683
684
685
686
687 error = blkdev_issue_flush(bdev, GFP_KERNEL, NULL);
688 if (error == -EOPNOTSUPP)
689 error = 0;
690
691 return error;
692}
693EXPORT_SYMBOL(blkdev_fsync);
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711int bdev_read_page(struct block_device *bdev, sector_t sector,
712 struct page *page)
713{
714 const struct block_device_operations *ops = bdev->bd_disk->fops;
715 int result = -EOPNOTSUPP;
716
717 if (!ops->rw_page || bdev_get_integrity(bdev))
718 return result;
719
720 result = blk_queue_enter(bdev->bd_queue, 0);
721 if (result)
722 return result;
723 result = ops->rw_page(bdev, sector + get_start_sect(bdev), page,
724 REQ_OP_READ);
725 blk_queue_exit(bdev->bd_queue);
726 return result;
727}
728EXPORT_SYMBOL_GPL(bdev_read_page);
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749int bdev_write_page(struct block_device *bdev, sector_t sector,
750 struct page *page, struct writeback_control *wbc)
751{
752 int result;
753 const struct block_device_operations *ops = bdev->bd_disk->fops;
754
755 if (!ops->rw_page || bdev_get_integrity(bdev))
756 return -EOPNOTSUPP;
757 result = blk_queue_enter(bdev->bd_queue, 0);
758 if (result)
759 return result;
760
761 set_page_writeback(page);
762 result = ops->rw_page(bdev, sector + get_start_sect(bdev), page,
763 REQ_OP_WRITE);
764 if (result) {
765 end_page_writeback(page);
766 } else {
767 clean_page_buffers(page);
768 unlock_page(page);
769 }
770 blk_queue_exit(bdev->bd_queue);
771 return result;
772}
773EXPORT_SYMBOL_GPL(bdev_write_page);
774
775
776
777
778
779static __cacheline_aligned_in_smp DEFINE_SPINLOCK(bdev_lock);
780static struct kmem_cache * bdev_cachep __read_mostly;
781
782static struct inode *bdev_alloc_inode(struct super_block *sb)
783{
784 struct bdev_inode *ei = kmem_cache_alloc(bdev_cachep, GFP_KERNEL);
785 if (!ei)
786 return NULL;
787 return &ei->vfs_inode;
788}
789
790static void bdev_free_inode(struct inode *inode)
791{
792 kmem_cache_free(bdev_cachep, BDEV_I(inode));
793}
794
795static void init_once(void *foo)
796{
797 struct bdev_inode *ei = (struct bdev_inode *) foo;
798 struct block_device *bdev = &ei->bdev;
799
800 memset(bdev, 0, sizeof(*bdev));
801 mutex_init(&bdev->bd_mutex);
802 INIT_LIST_HEAD(&bdev->bd_list);
803#ifdef CONFIG_SYSFS
804 INIT_LIST_HEAD(&bdev->bd_holder_disks);
805#endif
806 bdev->bd_bdi = &noop_backing_dev_info;
807 inode_init_once(&ei->vfs_inode);
808
809 mutex_init(&bdev->bd_fsfreeze_mutex);
810}
811
812static void bdev_evict_inode(struct inode *inode)
813{
814 struct block_device *bdev = &BDEV_I(inode)->bdev;
815 truncate_inode_pages_final(&inode->i_data);
816 invalidate_inode_buffers(inode);
817 clear_inode(inode);
818 spin_lock(&bdev_lock);
819 list_del_init(&bdev->bd_list);
820 spin_unlock(&bdev_lock);
821
822 inode_detach_wb(inode);
823 if (bdev->bd_bdi != &noop_backing_dev_info) {
824 bdi_put(bdev->bd_bdi);
825 bdev->bd_bdi = &noop_backing_dev_info;
826 }
827}
828
829static const struct super_operations bdev_sops = {
830 .statfs = simple_statfs,
831 .alloc_inode = bdev_alloc_inode,
832 .free_inode = bdev_free_inode,
833 .drop_inode = generic_delete_inode,
834 .evict_inode = bdev_evict_inode,
835};
836
837static struct dentry *bd_mount(struct file_system_type *fs_type,
838 int flags, const char *dev_name, void *data)
839{
840 struct dentry *dent;
841 dent = mount_pseudo(fs_type, "bdev:", &bdev_sops, NULL, BDEVFS_MAGIC);
842 if (!IS_ERR(dent))
843 dent->d_sb->s_iflags |= SB_I_CGROUPWB;
844 return dent;
845}
846
847static struct file_system_type bd_type = {
848 .name = "bdev",
849 .mount = bd_mount,
850 .kill_sb = kill_anon_super,
851};
852
853struct super_block *blockdev_superblock __read_mostly;
854EXPORT_SYMBOL_GPL(blockdev_superblock);
855
856void __init bdev_cache_init(void)
857{
858 int err;
859 static struct vfsmount *bd_mnt;
860
861 bdev_cachep = kmem_cache_create("bdev_cache", sizeof(struct bdev_inode),
862 0, (SLAB_HWCACHE_ALIGN|SLAB_RECLAIM_ACCOUNT|
863 SLAB_MEM_SPREAD|SLAB_ACCOUNT|SLAB_PANIC),
864 init_once);
865 err = register_filesystem(&bd_type);
866 if (err)
867 panic("Cannot register bdev pseudo-fs");
868 bd_mnt = kern_mount(&bd_type);
869 if (IS_ERR(bd_mnt))
870 panic("Cannot create bdev pseudo-fs");
871 blockdev_superblock = bd_mnt->mnt_sb;
872}
873
874
875
876
877
878
879static inline unsigned long hash(dev_t dev)
880{
881 return MAJOR(dev)+MINOR(dev);
882}
883
884static int bdev_test(struct inode *inode, void *data)
885{
886 return BDEV_I(inode)->bdev.bd_dev == *(dev_t *)data;
887}
888
889static int bdev_set(struct inode *inode, void *data)
890{
891 BDEV_I(inode)->bdev.bd_dev = *(dev_t *)data;
892 return 0;
893}
894
895static LIST_HEAD(all_bdevs);
896
897
898
899
900
901void bdev_unhash_inode(dev_t dev)
902{
903 struct inode *inode;
904
905 inode = ilookup5(blockdev_superblock, hash(dev), bdev_test, &dev);
906 if (inode) {
907 remove_inode_hash(inode);
908 iput(inode);
909 }
910}
911
912struct block_device *bdget(dev_t dev)
913{
914 struct block_device *bdev;
915 struct inode *inode;
916
917 inode = iget5_locked(blockdev_superblock, hash(dev),
918 bdev_test, bdev_set, &dev);
919
920 if (!inode)
921 return NULL;
922
923 bdev = &BDEV_I(inode)->bdev;
924
925 if (inode->i_state & I_NEW) {
926 bdev->bd_contains = NULL;
927 bdev->bd_super = NULL;
928 bdev->bd_inode = inode;
929 bdev->bd_block_size = i_blocksize(inode);
930 bdev->bd_part_count = 0;
931 bdev->bd_invalidated = 0;
932 inode->i_mode = S_IFBLK;
933 inode->i_rdev = dev;
934 inode->i_bdev = bdev;
935 inode->i_data.a_ops = &def_blk_aops;
936 mapping_set_gfp_mask(&inode->i_data, GFP_USER);
937 spin_lock(&bdev_lock);
938 list_add(&bdev->bd_list, &all_bdevs);
939 spin_unlock(&bdev_lock);
940 unlock_new_inode(inode);
941 }
942 return bdev;
943}
944
945EXPORT_SYMBOL(bdget);
946
947
948
949
950
951struct block_device *bdgrab(struct block_device *bdev)
952{
953 ihold(bdev->bd_inode);
954 return bdev;
955}
956EXPORT_SYMBOL(bdgrab);
957
958long nr_blockdev_pages(void)
959{
960 struct block_device *bdev;
961 long ret = 0;
962 spin_lock(&bdev_lock);
963 list_for_each_entry(bdev, &all_bdevs, bd_list) {
964 ret += bdev->bd_inode->i_mapping->nrpages;
965 }
966 spin_unlock(&bdev_lock);
967 return ret;
968}
969
970void bdput(struct block_device *bdev)
971{
972 iput(bdev->bd_inode);
973}
974
975EXPORT_SYMBOL(bdput);
976
977static struct block_device *bd_acquire(struct inode *inode)
978{
979 struct block_device *bdev;
980
981 spin_lock(&bdev_lock);
982 bdev = inode->i_bdev;
983 if (bdev && !inode_unhashed(bdev->bd_inode)) {
984 bdgrab(bdev);
985 spin_unlock(&bdev_lock);
986 return bdev;
987 }
988 spin_unlock(&bdev_lock);
989
990
991
992
993
994
995
996 if (bdev)
997 bd_forget(inode);
998
999 bdev = bdget(inode->i_rdev);
1000 if (bdev) {
1001 spin_lock(&bdev_lock);
1002 if (!inode->i_bdev) {
1003
1004
1005
1006
1007
1008
1009 bdgrab(bdev);
1010 inode->i_bdev = bdev;
1011 inode->i_mapping = bdev->bd_inode->i_mapping;
1012 }
1013 spin_unlock(&bdev_lock);
1014 }
1015 return bdev;
1016}
1017
1018
1019
1020void bd_forget(struct inode *inode)
1021{
1022 struct block_device *bdev = NULL;
1023
1024 spin_lock(&bdev_lock);
1025 if (!sb_is_blkdev_sb(inode->i_sb))
1026 bdev = inode->i_bdev;
1027 inode->i_bdev = NULL;
1028 inode->i_mapping = &inode->i_data;
1029 spin_unlock(&bdev_lock);
1030
1031 if (bdev)
1032 bdput(bdev);
1033}
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049static bool bd_may_claim(struct block_device *bdev, struct block_device *whole,
1050 void *holder)
1051{
1052 if (bdev->bd_holder == holder)
1053 return true;
1054 else if (bdev->bd_holder != NULL)
1055 return false;
1056 else if (whole == bdev)
1057 return true;
1058
1059 else if (whole->bd_holder == bd_may_claim)
1060 return true;
1061 else if (whole->bd_holder != NULL)
1062 return false;
1063 else
1064 return true;
1065}
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085static int bd_prepare_to_claim(struct block_device *bdev,
1086 struct block_device *whole, void *holder)
1087{
1088retry:
1089
1090 if (!bd_may_claim(bdev, whole, holder))
1091 return -EBUSY;
1092
1093
1094 if (whole->bd_claiming) {
1095 wait_queue_head_t *wq = bit_waitqueue(&whole->bd_claiming, 0);
1096 DEFINE_WAIT(wait);
1097
1098 prepare_to_wait(wq, &wait, TASK_UNINTERRUPTIBLE);
1099 spin_unlock(&bdev_lock);
1100 schedule();
1101 finish_wait(wq, &wait);
1102 spin_lock(&bdev_lock);
1103 goto retry;
1104 }
1105
1106
1107 return 0;
1108}
1109
1110static struct gendisk *bdev_get_gendisk(struct block_device *bdev, int *partno)
1111{
1112 struct gendisk *disk = get_gendisk(bdev->bd_dev, partno);
1113
1114 if (!disk)
1115 return NULL;
1116
1117
1118
1119
1120
1121
1122
1123
1124 if (inode_unhashed(bdev->bd_inode)) {
1125 put_disk_and_module(disk);
1126 return NULL;
1127 }
1128 return disk;
1129}
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154static struct block_device *bd_start_claiming(struct block_device *bdev,
1155 void *holder)
1156{
1157 struct gendisk *disk;
1158 struct block_device *whole;
1159 int partno, err;
1160
1161 might_sleep();
1162
1163
1164
1165
1166
1167 disk = bdev_get_gendisk(bdev, &partno);
1168 if (!disk)
1169 return ERR_PTR(-ENXIO);
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179 if (partno)
1180 whole = bdget_disk(disk, 0);
1181 else
1182 whole = bdgrab(bdev);
1183
1184 put_disk_and_module(disk);
1185 if (!whole)
1186 return ERR_PTR(-ENOMEM);
1187
1188
1189 spin_lock(&bdev_lock);
1190
1191 err = bd_prepare_to_claim(bdev, whole, holder);
1192 if (err == 0) {
1193 whole->bd_claiming = holder;
1194 spin_unlock(&bdev_lock);
1195 return whole;
1196 } else {
1197 spin_unlock(&bdev_lock);
1198 bdput(whole);
1199 return ERR_PTR(err);
1200 }
1201}
1202
1203#ifdef CONFIG_SYSFS
1204struct bd_holder_disk {
1205 struct list_head list;
1206 struct gendisk *disk;
1207 int refcnt;
1208};
1209
1210static struct bd_holder_disk *bd_find_holder_disk(struct block_device *bdev,
1211 struct gendisk *disk)
1212{
1213 struct bd_holder_disk *holder;
1214
1215 list_for_each_entry(holder, &bdev->bd_holder_disks, list)
1216 if (holder->disk == disk)
1217 return holder;
1218 return NULL;
1219}
1220
1221static int add_symlink(struct kobject *from, struct kobject *to)
1222{
1223 return sysfs_create_link(from, to, kobject_name(to));
1224}
1225
1226static void del_symlink(struct kobject *from, struct kobject *to)
1227{
1228 sysfs_remove_link(from, kobject_name(to));
1229}
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259int bd_link_disk_holder(struct block_device *bdev, struct gendisk *disk)
1260{
1261 struct bd_holder_disk *holder;
1262 int ret = 0;
1263
1264 mutex_lock(&bdev->bd_mutex);
1265
1266 WARN_ON_ONCE(!bdev->bd_holder);
1267
1268
1269 if (WARN_ON(!disk->slave_dir || !bdev->bd_part->holder_dir))
1270 goto out_unlock;
1271
1272 holder = bd_find_holder_disk(bdev, disk);
1273 if (holder) {
1274 holder->refcnt++;
1275 goto out_unlock;
1276 }
1277
1278 holder = kzalloc(sizeof(*holder), GFP_KERNEL);
1279 if (!holder) {
1280 ret = -ENOMEM;
1281 goto out_unlock;
1282 }
1283
1284 INIT_LIST_HEAD(&holder->list);
1285 holder->disk = disk;
1286 holder->refcnt = 1;
1287
1288 ret = add_symlink(disk->slave_dir, &part_to_dev(bdev->bd_part)->kobj);
1289 if (ret)
1290 goto out_free;
1291
1292 ret = add_symlink(bdev->bd_part->holder_dir, &disk_to_dev(disk)->kobj);
1293 if (ret)
1294 goto out_del;
1295
1296
1297
1298
1299 kobject_get(bdev->bd_part->holder_dir);
1300
1301 list_add(&holder->list, &bdev->bd_holder_disks);
1302 goto out_unlock;
1303
1304out_del:
1305 del_symlink(disk->slave_dir, &part_to_dev(bdev->bd_part)->kobj);
1306out_free:
1307 kfree(holder);
1308out_unlock:
1309 mutex_unlock(&bdev->bd_mutex);
1310 return ret;
1311}
1312EXPORT_SYMBOL_GPL(bd_link_disk_holder);
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324void bd_unlink_disk_holder(struct block_device *bdev, struct gendisk *disk)
1325{
1326 struct bd_holder_disk *holder;
1327
1328 mutex_lock(&bdev->bd_mutex);
1329
1330 holder = bd_find_holder_disk(bdev, disk);
1331
1332 if (!WARN_ON_ONCE(holder == NULL) && !--holder->refcnt) {
1333 del_symlink(disk->slave_dir, &part_to_dev(bdev->bd_part)->kobj);
1334 del_symlink(bdev->bd_part->holder_dir,
1335 &disk_to_dev(disk)->kobj);
1336 kobject_put(bdev->bd_part->holder_dir);
1337 list_del_init(&holder->list);
1338 kfree(holder);
1339 }
1340
1341 mutex_unlock(&bdev->bd_mutex);
1342}
1343EXPORT_SYMBOL_GPL(bd_unlink_disk_holder);
1344#endif
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356static void flush_disk(struct block_device *bdev, bool kill_dirty)
1357{
1358 if (__invalidate_device(bdev, kill_dirty)) {
1359 printk(KERN_WARNING "VFS: busy inodes on changed media or "
1360 "resized disk %s\n",
1361 bdev->bd_disk ? bdev->bd_disk->disk_name : "");
1362 }
1363
1364 if (!bdev->bd_disk)
1365 return;
1366 if (disk_part_scan_enabled(bdev->bd_disk))
1367 bdev->bd_invalidated = 1;
1368}
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380void check_disk_size_change(struct gendisk *disk, struct block_device *bdev,
1381 bool verbose)
1382{
1383 loff_t disk_size, bdev_size;
1384
1385 disk_size = (loff_t)get_capacity(disk) << 9;
1386 bdev_size = i_size_read(bdev->bd_inode);
1387 if (disk_size != bdev_size) {
1388 if (verbose) {
1389 printk(KERN_INFO
1390 "%s: detected capacity change from %lld to %lld\n",
1391 disk->disk_name, bdev_size, disk_size);
1392 }
1393 i_size_write(bdev->bd_inode, disk_size);
1394 if (bdev_size > disk_size)
1395 flush_disk(bdev, false);
1396 }
1397}
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407int revalidate_disk(struct gendisk *disk)
1408{
1409 int ret = 0;
1410
1411 if (disk->fops->revalidate_disk)
1412 ret = disk->fops->revalidate_disk(disk);
1413
1414
1415
1416
1417
1418 if (!(disk->flags & GENHD_FL_HIDDEN)) {
1419 struct block_device *bdev = bdget_disk(disk, 0);
1420
1421 if (!bdev)
1422 return ret;
1423
1424 mutex_lock(&bdev->bd_mutex);
1425 check_disk_size_change(disk, bdev, ret == 0);
1426 bdev->bd_invalidated = 0;
1427 mutex_unlock(&bdev->bd_mutex);
1428 bdput(bdev);
1429 }
1430 return ret;
1431}
1432EXPORT_SYMBOL(revalidate_disk);
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443int check_disk_change(struct block_device *bdev)
1444{
1445 struct gendisk *disk = bdev->bd_disk;
1446 const struct block_device_operations *bdops = disk->fops;
1447 unsigned int events;
1448
1449 events = disk_clear_events(disk, DISK_EVENT_MEDIA_CHANGE |
1450 DISK_EVENT_EJECT_REQUEST);
1451 if (!(events & DISK_EVENT_MEDIA_CHANGE))
1452 return 0;
1453
1454 flush_disk(bdev, true);
1455 if (bdops->revalidate_disk)
1456 bdops->revalidate_disk(bdev->bd_disk);
1457 return 1;
1458}
1459
1460EXPORT_SYMBOL(check_disk_change);
1461
1462void bd_set_size(struct block_device *bdev, loff_t size)
1463{
1464 inode_lock(bdev->bd_inode);
1465 i_size_write(bdev->bd_inode, size);
1466 inode_unlock(bdev->bd_inode);
1467}
1468EXPORT_SYMBOL(bd_set_size);
1469
1470static void __blkdev_put(struct block_device *bdev, fmode_t mode, int for_part);
1471
1472
1473
1474
1475
1476
1477
1478
1479static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part)
1480{
1481 struct gendisk *disk;
1482 int ret;
1483 int partno;
1484 int perm = 0;
1485 bool first_open = false;
1486
1487 if (mode & FMODE_READ)
1488 perm |= MAY_READ;
1489 if (mode & FMODE_WRITE)
1490 perm |= MAY_WRITE;
1491
1492
1493
1494 if (!for_part) {
1495 ret = devcgroup_inode_permission(bdev->bd_inode, perm);
1496 if (ret != 0) {
1497 bdput(bdev);
1498 return ret;
1499 }
1500 }
1501
1502 restart:
1503
1504 ret = -ENXIO;
1505 disk = bdev_get_gendisk(bdev, &partno);
1506 if (!disk)
1507 goto out;
1508
1509 disk_block_events(disk);
1510 mutex_lock_nested(&bdev->bd_mutex, for_part);
1511 if (!bdev->bd_openers) {
1512 first_open = true;
1513 bdev->bd_disk = disk;
1514 bdev->bd_queue = disk->queue;
1515 bdev->bd_contains = bdev;
1516 bdev->bd_partno = partno;
1517
1518 if (!partno) {
1519 ret = -ENXIO;
1520 bdev->bd_part = disk_get_part(disk, partno);
1521 if (!bdev->bd_part)
1522 goto out_clear;
1523
1524 ret = 0;
1525 if (disk->fops->open) {
1526 ret = disk->fops->open(bdev, mode);
1527 if (ret == -ERESTARTSYS) {
1528
1529
1530
1531
1532 disk_put_part(bdev->bd_part);
1533 bdev->bd_part = NULL;
1534 bdev->bd_disk = NULL;
1535 bdev->bd_queue = NULL;
1536 mutex_unlock(&bdev->bd_mutex);
1537 disk_unblock_events(disk);
1538 put_disk_and_module(disk);
1539 goto restart;
1540 }
1541 }
1542
1543 if (!ret) {
1544 bd_set_size(bdev,(loff_t)get_capacity(disk)<<9);
1545 set_init_blocksize(bdev);
1546 }
1547
1548
1549
1550
1551
1552
1553
1554 if (bdev->bd_invalidated) {
1555 if (!ret)
1556 rescan_partitions(disk, bdev);
1557 else if (ret == -ENOMEDIUM)
1558 invalidate_partitions(disk, bdev);
1559 }
1560
1561 if (ret)
1562 goto out_clear;
1563 } else {
1564 struct block_device *whole;
1565 whole = bdget_disk(disk, 0);
1566 ret = -ENOMEM;
1567 if (!whole)
1568 goto out_clear;
1569 BUG_ON(for_part);
1570 ret = __blkdev_get(whole, mode, 1);
1571 if (ret)
1572 goto out_clear;
1573 bdev->bd_contains = whole;
1574 bdev->bd_part = disk_get_part(disk, partno);
1575 if (!(disk->flags & GENHD_FL_UP) ||
1576 !bdev->bd_part || !bdev->bd_part->nr_sects) {
1577 ret = -ENXIO;
1578 goto out_clear;
1579 }
1580 bd_set_size(bdev, (loff_t)bdev->bd_part->nr_sects << 9);
1581 set_init_blocksize(bdev);
1582 }
1583
1584 if (bdev->bd_bdi == &noop_backing_dev_info)
1585 bdev->bd_bdi = bdi_get(disk->queue->backing_dev_info);
1586 } else {
1587 if (bdev->bd_contains == bdev) {
1588 ret = 0;
1589 if (bdev->bd_disk->fops->open)
1590 ret = bdev->bd_disk->fops->open(bdev, mode);
1591
1592 if (bdev->bd_invalidated) {
1593 if (!ret)
1594 rescan_partitions(bdev->bd_disk, bdev);
1595 else if (ret == -ENOMEDIUM)
1596 invalidate_partitions(bdev->bd_disk, bdev);
1597 }
1598 if (ret)
1599 goto out_unlock_bdev;
1600 }
1601 }
1602 bdev->bd_openers++;
1603 if (for_part)
1604 bdev->bd_part_count++;
1605 mutex_unlock(&bdev->bd_mutex);
1606 disk_unblock_events(disk);
1607
1608 if (!first_open)
1609 put_disk_and_module(disk);
1610 return 0;
1611
1612 out_clear:
1613 disk_put_part(bdev->bd_part);
1614 bdev->bd_disk = NULL;
1615 bdev->bd_part = NULL;
1616 bdev->bd_queue = NULL;
1617 if (bdev != bdev->bd_contains)
1618 __blkdev_put(bdev->bd_contains, mode, 1);
1619 bdev->bd_contains = NULL;
1620 out_unlock_bdev:
1621 mutex_unlock(&bdev->bd_mutex);
1622 disk_unblock_events(disk);
1623 put_disk_and_module(disk);
1624 out:
1625 bdput(bdev);
1626
1627 return ret;
1628}
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649int blkdev_get(struct block_device *bdev, fmode_t mode, void *holder)
1650{
1651 struct block_device *whole = NULL;
1652 int res;
1653
1654 WARN_ON_ONCE((mode & FMODE_EXCL) && !holder);
1655
1656 if ((mode & FMODE_EXCL) && holder) {
1657 whole = bd_start_claiming(bdev, holder);
1658 if (IS_ERR(whole)) {
1659 bdput(bdev);
1660 return PTR_ERR(whole);
1661 }
1662 }
1663
1664 res = __blkdev_get(bdev, mode, 0);
1665
1666 if (whole) {
1667 struct gendisk *disk = whole->bd_disk;
1668
1669
1670 mutex_lock(&bdev->bd_mutex);
1671 spin_lock(&bdev_lock);
1672
1673 if (!res) {
1674 BUG_ON(!bd_may_claim(bdev, whole, holder));
1675
1676
1677
1678
1679
1680
1681 whole->bd_holders++;
1682 whole->bd_holder = bd_may_claim;
1683 bdev->bd_holders++;
1684 bdev->bd_holder = holder;
1685 }
1686
1687
1688 BUG_ON(whole->bd_claiming != holder);
1689 whole->bd_claiming = NULL;
1690 wake_up_bit(&whole->bd_claiming, 0);
1691
1692 spin_unlock(&bdev_lock);
1693
1694
1695
1696
1697
1698
1699
1700
1701 if (!res && (mode & FMODE_WRITE) && !bdev->bd_write_holder &&
1702 (disk->flags & GENHD_FL_BLOCK_EVENTS_ON_EXCL_WRITE)) {
1703 bdev->bd_write_holder = true;
1704 disk_block_events(disk);
1705 }
1706
1707 mutex_unlock(&bdev->bd_mutex);
1708 bdput(whole);
1709 }
1710
1711 return res;
1712}
1713EXPORT_SYMBOL(blkdev_get);
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732struct block_device *blkdev_get_by_path(const char *path, fmode_t mode,
1733 void *holder)
1734{
1735 struct block_device *bdev;
1736 int err;
1737
1738 bdev = lookup_bdev(path);
1739 if (IS_ERR(bdev))
1740 return bdev;
1741
1742 err = blkdev_get(bdev, mode, holder);
1743 if (err)
1744 return ERR_PTR(err);
1745
1746 if ((mode & FMODE_WRITE) && bdev_read_only(bdev)) {
1747 blkdev_put(bdev, mode);
1748 return ERR_PTR(-EACCES);
1749 }
1750
1751 return bdev;
1752}
1753EXPORT_SYMBOL(blkdev_get_by_path);
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775
1776
1777struct block_device *blkdev_get_by_dev(dev_t dev, fmode_t mode, void *holder)
1778{
1779 struct block_device *bdev;
1780 int err;
1781
1782 bdev = bdget(dev);
1783 if (!bdev)
1784 return ERR_PTR(-ENOMEM);
1785
1786 err = blkdev_get(bdev, mode, holder);
1787 if (err)
1788 return ERR_PTR(err);
1789
1790 return bdev;
1791}
1792EXPORT_SYMBOL(blkdev_get_by_dev);
1793
1794static int blkdev_open(struct inode * inode, struct file * filp)
1795{
1796 struct block_device *bdev;
1797
1798
1799
1800
1801
1802
1803
1804 filp->f_flags |= O_LARGEFILE;
1805
1806 filp->f_mode |= FMODE_NOWAIT;
1807
1808 if (filp->f_flags & O_NDELAY)
1809 filp->f_mode |= FMODE_NDELAY;
1810 if (filp->f_flags & O_EXCL)
1811 filp->f_mode |= FMODE_EXCL;
1812 if ((filp->f_flags & O_ACCMODE) == 3)
1813 filp->f_mode |= FMODE_WRITE_IOCTL;
1814
1815 bdev = bd_acquire(inode);
1816 if (bdev == NULL)
1817 return -ENOMEM;
1818
1819 filp->f_mapping = bdev->bd_inode->i_mapping;
1820 filp->f_wb_err = filemap_sample_wb_err(filp->f_mapping);
1821
1822 return blkdev_get(bdev, filp->f_mode, filp);
1823}
1824
1825static void __blkdev_put(struct block_device *bdev, fmode_t mode, int for_part)
1826{
1827 struct gendisk *disk = bdev->bd_disk;
1828 struct block_device *victim = NULL;
1829
1830 mutex_lock_nested(&bdev->bd_mutex, for_part);
1831 if (for_part)
1832 bdev->bd_part_count--;
1833
1834 if (!--bdev->bd_openers) {
1835 WARN_ON_ONCE(bdev->bd_holders);
1836 sync_blockdev(bdev);
1837 kill_bdev(bdev);
1838
1839 bdev_write_inode(bdev);
1840 }
1841 if (bdev->bd_contains == bdev) {
1842 if (disk->fops->release)
1843 disk->fops->release(disk, mode);
1844 }
1845 if (!bdev->bd_openers) {
1846 disk_put_part(bdev->bd_part);
1847 bdev->bd_part = NULL;
1848 bdev->bd_disk = NULL;
1849 if (bdev != bdev->bd_contains)
1850 victim = bdev->bd_contains;
1851 bdev->bd_contains = NULL;
1852
1853 put_disk_and_module(disk);
1854 }
1855 mutex_unlock(&bdev->bd_mutex);
1856 bdput(bdev);
1857 if (victim)
1858 __blkdev_put(victim, mode, 1);
1859}
1860
1861void blkdev_put(struct block_device *bdev, fmode_t mode)
1862{
1863 mutex_lock(&bdev->bd_mutex);
1864
1865 if (mode & FMODE_EXCL) {
1866 bool bdev_free;
1867
1868
1869
1870
1871
1872
1873 spin_lock(&bdev_lock);
1874
1875 WARN_ON_ONCE(--bdev->bd_holders < 0);
1876 WARN_ON_ONCE(--bdev->bd_contains->bd_holders < 0);
1877
1878
1879 if ((bdev_free = !bdev->bd_holders))
1880 bdev->bd_holder = NULL;
1881 if (!bdev->bd_contains->bd_holders)
1882 bdev->bd_contains->bd_holder = NULL;
1883
1884 spin_unlock(&bdev_lock);
1885
1886
1887
1888
1889
1890 if (bdev_free && bdev->bd_write_holder) {
1891 disk_unblock_events(bdev->bd_disk);
1892 bdev->bd_write_holder = false;
1893 }
1894 }
1895
1896
1897
1898
1899
1900
1901 disk_flush_events(bdev->bd_disk, DISK_EVENT_MEDIA_CHANGE);
1902
1903 mutex_unlock(&bdev->bd_mutex);
1904
1905 __blkdev_put(bdev, mode, 0);
1906}
1907EXPORT_SYMBOL(blkdev_put);
1908
1909static int blkdev_close(struct inode * inode, struct file * filp)
1910{
1911 struct block_device *bdev = I_BDEV(bdev_file_inode(filp));
1912 blkdev_put(bdev, filp->f_mode);
1913 return 0;
1914}
1915
1916static long block_ioctl(struct file *file, unsigned cmd, unsigned long arg)
1917{
1918 struct block_device *bdev = I_BDEV(bdev_file_inode(file));
1919 fmode_t mode = file->f_mode;
1920
1921
1922
1923
1924
1925 if (file->f_flags & O_NDELAY)
1926 mode |= FMODE_NDELAY;
1927 else
1928 mode &= ~FMODE_NDELAY;
1929
1930 return blkdev_ioctl(bdev, mode, cmd, arg);
1931}
1932
1933
1934
1935
1936
1937
1938
1939
1940ssize_t blkdev_write_iter(struct kiocb *iocb, struct iov_iter *from)
1941{
1942 struct file *file = iocb->ki_filp;
1943 struct inode *bd_inode = bdev_file_inode(file);
1944 loff_t size = i_size_read(bd_inode);
1945 struct blk_plug plug;
1946 ssize_t ret;
1947
1948 if (bdev_read_only(I_BDEV(bd_inode)))
1949 return -EPERM;
1950
1951 if (!iov_iter_count(from))
1952 return 0;
1953
1954 if (iocb->ki_pos >= size)
1955 return -ENOSPC;
1956
1957 if ((iocb->ki_flags & (IOCB_NOWAIT | IOCB_DIRECT)) == IOCB_NOWAIT)
1958 return -EOPNOTSUPP;
1959
1960 iov_iter_truncate(from, size - iocb->ki_pos);
1961
1962 blk_start_plug(&plug);
1963 ret = __generic_file_write_iter(iocb, from);
1964 if (ret > 0)
1965 ret = generic_write_sync(iocb, ret);
1966 blk_finish_plug(&plug);
1967 return ret;
1968}
1969EXPORT_SYMBOL_GPL(blkdev_write_iter);
1970
1971ssize_t blkdev_read_iter(struct kiocb *iocb, struct iov_iter *to)
1972{
1973 struct file *file = iocb->ki_filp;
1974 struct inode *bd_inode = bdev_file_inode(file);
1975 loff_t size = i_size_read(bd_inode);
1976 loff_t pos = iocb->ki_pos;
1977
1978 if (pos >= size)
1979 return 0;
1980
1981 size -= pos;
1982 iov_iter_truncate(to, size);
1983 return generic_file_read_iter(iocb, to);
1984}
1985EXPORT_SYMBOL_GPL(blkdev_read_iter);
1986
1987
1988
1989
1990
1991static int blkdev_releasepage(struct page *page, gfp_t wait)
1992{
1993 struct super_block *super = BDEV_I(page->mapping->host)->bdev.bd_super;
1994
1995 if (super && super->s_op->bdev_try_to_free_page)
1996 return super->s_op->bdev_try_to_free_page(super, page, wait);
1997
1998 return try_to_free_buffers(page);
1999}
2000
2001static int blkdev_writepages(struct address_space *mapping,
2002 struct writeback_control *wbc)
2003{
2004 return generic_writepages(mapping, wbc);
2005}
2006
2007static const struct address_space_operations def_blk_aops = {
2008 .readpage = blkdev_readpage,
2009 .readpages = blkdev_readpages,
2010 .writepage = blkdev_writepage,
2011 .write_begin = blkdev_write_begin,
2012 .write_end = blkdev_write_end,
2013 .writepages = blkdev_writepages,
2014 .releasepage = blkdev_releasepage,
2015 .direct_IO = blkdev_direct_IO,
2016 .migratepage = buffer_migrate_page_norefs,
2017 .is_dirty_writeback = buffer_check_dirty_writeback,
2018};
2019
2020#define BLKDEV_FALLOC_FL_SUPPORTED \
2021 (FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE | \
2022 FALLOC_FL_ZERO_RANGE | FALLOC_FL_NO_HIDE_STALE)
2023
2024static long blkdev_fallocate(struct file *file, int mode, loff_t start,
2025 loff_t len)
2026{
2027 struct block_device *bdev = I_BDEV(bdev_file_inode(file));
2028 struct address_space *mapping;
2029 loff_t end = start + len - 1;
2030 loff_t isize;
2031 int error;
2032
2033
2034 if (mode & ~BLKDEV_FALLOC_FL_SUPPORTED)
2035 return -EOPNOTSUPP;
2036
2037
2038 isize = i_size_read(bdev->bd_inode);
2039 if (start >= isize)
2040 return -EINVAL;
2041 if (end >= isize) {
2042 if (mode & FALLOC_FL_KEEP_SIZE) {
2043 len = isize - start;
2044 end = start + len - 1;
2045 } else
2046 return -EINVAL;
2047 }
2048
2049
2050
2051
2052 if ((start | len) & (bdev_logical_block_size(bdev) - 1))
2053 return -EINVAL;
2054
2055
2056 mapping = bdev->bd_inode->i_mapping;
2057 truncate_inode_pages_range(mapping, start, end);
2058
2059 switch (mode) {
2060 case FALLOC_FL_ZERO_RANGE:
2061 case FALLOC_FL_ZERO_RANGE | FALLOC_FL_KEEP_SIZE:
2062 error = blkdev_issue_zeroout(bdev, start >> 9, len >> 9,
2063 GFP_KERNEL, BLKDEV_ZERO_NOUNMAP);
2064 break;
2065 case FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE:
2066 error = blkdev_issue_zeroout(bdev, start >> 9, len >> 9,
2067 GFP_KERNEL, BLKDEV_ZERO_NOFALLBACK);
2068 break;
2069 case FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE | FALLOC_FL_NO_HIDE_STALE:
2070 error = blkdev_issue_discard(bdev, start >> 9, len >> 9,
2071 GFP_KERNEL, 0);
2072 break;
2073 default:
2074 return -EOPNOTSUPP;
2075 }
2076 if (error)
2077 return error;
2078
2079
2080
2081
2082
2083
2084 return invalidate_inode_pages2_range(mapping,
2085 start >> PAGE_SHIFT,
2086 end >> PAGE_SHIFT);
2087}
2088
2089const struct file_operations def_blk_fops = {
2090 .open = blkdev_open,
2091 .release = blkdev_close,
2092 .llseek = block_llseek,
2093 .read_iter = blkdev_read_iter,
2094 .write_iter = blkdev_write_iter,
2095 .iopoll = blkdev_iopoll,
2096 .mmap = generic_file_mmap,
2097 .fsync = blkdev_fsync,
2098 .unlocked_ioctl = block_ioctl,
2099#ifdef CONFIG_COMPAT
2100 .compat_ioctl = compat_blkdev_ioctl,
2101#endif
2102 .splice_read = generic_file_splice_read,
2103 .splice_write = iter_file_splice_write,
2104 .fallocate = blkdev_fallocate,
2105};
2106
2107int ioctl_by_bdev(struct block_device *bdev, unsigned cmd, unsigned long arg)
2108{
2109 int res;
2110 mm_segment_t old_fs = get_fs();
2111 set_fs(KERNEL_DS);
2112 res = blkdev_ioctl(bdev, 0, cmd, arg);
2113 set_fs(old_fs);
2114 return res;
2115}
2116
2117EXPORT_SYMBOL(ioctl_by_bdev);
2118
2119
2120
2121
2122
2123
2124
2125
2126
2127struct block_device *lookup_bdev(const char *pathname)
2128{
2129 struct block_device *bdev;
2130 struct inode *inode;
2131 struct path path;
2132 int error;
2133
2134 if (!pathname || !*pathname)
2135 return ERR_PTR(-EINVAL);
2136
2137 error = kern_path(pathname, LOOKUP_FOLLOW, &path);
2138 if (error)
2139 return ERR_PTR(error);
2140
2141 inode = d_backing_inode(path.dentry);
2142 error = -ENOTBLK;
2143 if (!S_ISBLK(inode->i_mode))
2144 goto fail;
2145 error = -EACCES;
2146 if (!may_open_dev(&path))
2147 goto fail;
2148 error = -ENOMEM;
2149 bdev = bd_acquire(inode);
2150 if (!bdev)
2151 goto fail;
2152out:
2153 path_put(&path);
2154 return bdev;
2155fail:
2156 bdev = ERR_PTR(error);
2157 goto out;
2158}
2159EXPORT_SYMBOL(lookup_bdev);
2160
2161int __invalidate_device(struct block_device *bdev, bool kill_dirty)
2162{
2163 struct super_block *sb = get_super(bdev);
2164 int res = 0;
2165
2166 if (sb) {
2167
2168
2169
2170
2171
2172
2173 shrink_dcache_sb(sb);
2174 res = invalidate_inodes(sb, kill_dirty);
2175 drop_super(sb);
2176 }
2177 invalidate_bdev(bdev);
2178 return res;
2179}
2180EXPORT_SYMBOL(__invalidate_device);
2181
2182void iterate_bdevs(void (*func)(struct block_device *, void *), void *arg)
2183{
2184 struct inode *inode, *old_inode = NULL;
2185
2186 spin_lock(&blockdev_superblock->s_inode_list_lock);
2187 list_for_each_entry(inode, &blockdev_superblock->s_inodes, i_sb_list) {
2188 struct address_space *mapping = inode->i_mapping;
2189 struct block_device *bdev;
2190
2191 spin_lock(&inode->i_lock);
2192 if (inode->i_state & (I_FREEING|I_WILL_FREE|I_NEW) ||
2193 mapping->nrpages == 0) {
2194 spin_unlock(&inode->i_lock);
2195 continue;
2196 }
2197 __iget(inode);
2198 spin_unlock(&inode->i_lock);
2199 spin_unlock(&blockdev_superblock->s_inode_list_lock);
2200
2201
2202
2203
2204
2205
2206
2207
2208 iput(old_inode);
2209 old_inode = inode;
2210 bdev = I_BDEV(inode);
2211
2212 mutex_lock(&bdev->bd_mutex);
2213 if (bdev->bd_openers)
2214 func(bdev, arg);
2215 mutex_unlock(&bdev->bd_mutex);
2216
2217 spin_lock(&blockdev_superblock->s_inode_list_lock);
2218 }
2219 spin_unlock(&blockdev_superblock->s_inode_list_lock);
2220 iput(old_inode);
2221}
2222